Cache Aware Scheduling

[PATCH v3 14/21] sched/cache: Respect LLC preference in task migration and detach

Posted by Tim Chen 1 month, 2 weeks ago

During the final step of load balancing, can_migrate_task() now
considers a task's LLC preference before moving it out of its
preferred LLC.

Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Suggested-by: K Prateek Nayak <kprateek.nayak@amd.com>
Co-developed-by: Chen Yu <yu.c.chen@intel.com>
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
---

Notes:
    v2->v3:
    Use the similar mechanism as NUMA balancing, which skips over
    the tasks that would degrade locality in can_migrate_task();
    and only if nr_balanced_failed is high enough do we ignore that.
    (Peter Zijlstra)
    
    Let migrate_degrade_locality() take precedence over
    migrate_degrades_llc(), which aims to migrate towards the preferred
    NUMA node. (Peter Zijlstra)

 kernel/sched/fair.c  | 64 +++++++++++++++++++++++++++++++++++++++++---
 kernel/sched/sched.h | 13 +++++++++
 2 files changed, 73 insertions(+), 4 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 03959a701514..d1145997b88d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -9973,8 +9973,8 @@ static enum llc_mig can_migrate_llc(int src_cpu, int dst_cpu,
  * Check if task p can migrate from source LLC to
  * destination LLC in terms of cache aware load balance.
  */
-static __maybe_unused enum llc_mig can_migrate_llc_task(int src_cpu, int dst_cpu,
-							struct task_struct *p)
+static enum llc_mig can_migrate_llc_task(int src_cpu, int dst_cpu,
+					 struct task_struct *p)
 {
 	struct mm_struct *mm;
 	bool to_pref;
@@ -10041,6 +10041,47 @@ alb_break_llc(struct lb_env *env)
 
 	return false;
 }
+
+/*
+ * Check if migrating task p from env->src_cpu to
+ * env->dst_cpu breaks LLC localiy.
+ */
+static bool migrate_degrades_llc(struct task_struct *p, struct lb_env *env)
+{
+	if (!sched_cache_enabled())
+		return false;
+
+	if (task_has_sched_core(p))
+		return false;
+	/*
+	 * Skip over tasks that would degrade LLC locality;
+	 * only when nr_balanced_failed is sufficiently high do we
+	 * ignore this constraint.
+	 *
+	 * Threshold of cache_nice_tries is set to 1 higher
+	 * than nr_balance_failed to avoid excessive task
+	 * migration at the same time. Refer to comments around
+	 * llc_balance().
+	 */
+	if (env->sd->nr_balance_failed >= env->sd->cache_nice_tries + 1)
+		return false;
+
+	/*
+	 * We know the env->src_cpu has some tasks prefer to
+	 * run on env->dst_cpu, skip the tasks do not prefer
+	 * env->dst_cpu, and find the one that prefers.
+	 */
+	if (env->migration_type == migrate_llc_task &&
+	    task_llc(p) != llc_id(env->dst_cpu))
+		return true;
+
+	if (can_migrate_llc_task(env->src_cpu,
+				 env->dst_cpu, p) != mig_forbid)
+		return false;
+
+	return true;
+}
+
 #else
 static inline bool get_llc_stats(int cpu, unsigned long *util,
 				 unsigned long *cap)
@@ -10053,6 +10094,12 @@ alb_break_llc(struct lb_env *env)
 {
 	return false;
 }
+
+static inline bool
+migrate_degrades_llc(struct task_struct *p, struct lb_env *env)
+{
+	return false;
+}
 #endif
 /*
  * can_migrate_task - may task p from runqueue rq be migrated to this_cpu?
@@ -10150,10 +10197,19 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
 		return 1;
 
 	degrades = migrate_degrades_locality(p, env);
-	if (!degrades)
+	if (!degrades) {
+		/*
+		 * If the NUMA locality is not broken,
+		 * further check if migration would hurt
+		 * LLC locality.
+		 */
+		if (migrate_degrades_llc(p, env))
+			return 0;
+
 		hot = task_hot(p, env);
-	else
+	} else {
 		hot = degrades > 0;
+	}
 
 	if (!hot || env->sd->nr_balance_failed > env->sd->cache_nice_tries) {
 		if (hot)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index ac8c7ac1ac0d..c18e59f320a6 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1495,6 +1495,14 @@ extern void sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags);
 extern void sched_core_get(void);
 extern void sched_core_put(void);
 
+static inline bool task_has_sched_core(struct task_struct *p)
+{
+	if (sched_core_disabled())
+		return false;
+
+	return !!p->core_cookie;
+}
+
 #else /* !CONFIG_SCHED_CORE: */
 
 static inline bool sched_core_enabled(struct rq *rq)
@@ -1534,6 +1542,11 @@ static inline bool sched_group_cookie_match(struct rq *rq,
 	return true;
 }
 
+static inline bool task_has_sched_core(struct task_struct *p)
+{
+	return false;
+}
+
 #endif /* !CONFIG_SCHED_CORE */
 
 #ifdef CONFIG_RT_GROUP_SCHED
-- 
2.32.0

Re: [PATCH v3 14/21] sched/cache: Respect LLC preference in task migration and detach

Posted by Madadi Vineeth Reddy 1 month, 1 week ago

On 11/02/26 03:48, Tim Chen wrote:
> During the final step of load balancing, can_migrate_task() now
> considers a task's LLC preference before moving it out of its
> preferred LLC.
> 
> Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> Suggested-by: K Prateek Nayak <kprateek.nayak@amd.com>
> Co-developed-by: Chen Yu <yu.c.chen@intel.com>
> Signed-off-by: Chen Yu <yu.c.chen@intel.com>
> Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
> ---
> 
> Notes:
>     v2->v3:
>     Use the similar mechanism as NUMA balancing, which skips over
>     the tasks that would degrade locality in can_migrate_task();
>     and only if nr_balanced_failed is high enough do we ignore that.
>     (Peter Zijlstra)
>     
>     Let migrate_degrade_locality() take precedence over
>     migrate_degrades_llc(), which aims to migrate towards the preferred
>     NUMA node. (Peter Zijlstra)
> 
>  kernel/sched/fair.c  | 64 +++++++++++++++++++++++++++++++++++++++++---
>  kernel/sched/sched.h | 13 +++++++++
>  2 files changed, 73 insertions(+), 4 deletions(-)
> 
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 03959a701514..d1145997b88d 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -9973,8 +9973,8 @@ static enum llc_mig can_migrate_llc(int src_cpu, int dst_cpu,
>   * Check if task p can migrate from source LLC to
>   * destination LLC in terms of cache aware load balance.
>   */
> -static __maybe_unused enum llc_mig can_migrate_llc_task(int src_cpu, int dst_cpu,
> -							struct task_struct *p)
> +static enum llc_mig can_migrate_llc_task(int src_cpu, int dst_cpu,
> +					 struct task_struct *p)
>  {
>  	struct mm_struct *mm;
>  	bool to_pref;
> @@ -10041,6 +10041,47 @@ alb_break_llc(struct lb_env *env)
>  
>  	return false;
>  }
> +
> +/*
> + * Check if migrating task p from env->src_cpu to
> + * env->dst_cpu breaks LLC localiy.
> + */
> +static bool migrate_degrades_llc(struct task_struct *p, struct lb_env *env)
> +{
> +	if (!sched_cache_enabled())
> +		return false;
> +
> +	if (task_has_sched_core(p))
> +		return false;
> +	/*
> +	 * Skip over tasks that would degrade LLC locality;
> +	 * only when nr_balanced_failed is sufficiently high do we
> +	 * ignore this constraint.
> +	 *
> +	 * Threshold of cache_nice_tries is set to 1 higher
> +	 * than nr_balance_failed to avoid excessive task
> +	 * migration at the same time. Refer to comments around
> +	 * llc_balance().
> +	 */
> +	if (env->sd->nr_balance_failed >= env->sd->cache_nice_tries + 1)
> +		return false;
> +
> +	/*
> +	 * We know the env->src_cpu has some tasks prefer to
> +	 * run on env->dst_cpu, skip the tasks do not prefer
> +	 * env->dst_cpu, and find the one that prefers.
> +	 */
> +	if (env->migration_type == migrate_llc_task &&
> +	    task_llc(p) != llc_id(env->dst_cpu))
> +		return true;

`task_llc(p)` returns the LLC id of the CPU the task is currently running on, right?
Wouldn’t we need to check the task’s *preferred* LLC instead?

Am I missing something?

Thanks,
Vineeth

> +
> +	if (can_migrate_llc_task(env->src_cpu,
> +				 env->dst_cpu, p) != mig_forbid)
> +		return false;
> +
> +	return true;
> +}
> +
>  #else
>  static inline bool get_llc_stats(int cpu, unsigned long *util,
>  				 unsigned long *cap)
> @@ -10053,6 +10094,12 @@ alb_break_llc(struct lb_env *env)
>  {
>  	return false;
>  }
> +
> +static inline bool
> +migrate_degrades_llc(struct task_struct *p, struct lb_env *env)
> +{
> +	return false;
> +}
>  #endif
>  /*
>   * can_migrate_task - may task p from runqueue rq be migrated to this_cpu?
> @@ -10150,10 +10197,19 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
>  		return 1;
>  
>  	degrades = migrate_degrades_locality(p, env);
> -	if (!degrades)
> +	if (!degrades) {
> +		/*
> +		 * If the NUMA locality is not broken,
> +		 * further check if migration would hurt
> +		 * LLC locality.
> +		 */
> +		if (migrate_degrades_llc(p, env))
> +			return 0;
> +
>  		hot = task_hot(p, env);
> -	else
> +	} else {
>  		hot = degrades > 0;
> +	}
>  
>  	if (!hot || env->sd->nr_balance_failed > env->sd->cache_nice_tries) {
>  		if (hot)
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index ac8c7ac1ac0d..c18e59f320a6 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -1495,6 +1495,14 @@ extern void sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags);
>  extern void sched_core_get(void);
>  extern void sched_core_put(void);
>  
> +static inline bool task_has_sched_core(struct task_struct *p)
> +{
> +	if (sched_core_disabled())
> +		return false;
> +
> +	return !!p->core_cookie;
> +}
> +
>  #else /* !CONFIG_SCHED_CORE: */
>  
>  static inline bool sched_core_enabled(struct rq *rq)
> @@ -1534,6 +1542,11 @@ static inline bool sched_group_cookie_match(struct rq *rq,
>  	return true;
>  }
>  
> +static inline bool task_has_sched_core(struct task_struct *p)
> +{
> +	return false;
> +}
> +
>  #endif /* !CONFIG_SCHED_CORE */
>  
>  #ifdef CONFIG_RT_GROUP_SCHED

Re: [PATCH v3 14/21] sched/cache: Respect LLC preference in task migration and detach

Posted by Chen, Yu C 1 month, 1 week ago

On 2/18/2026 5:14 PM, Madadi Vineeth Reddy wrote:
> On 11/02/26 03:48, Tim Chen wrote:
>> During the final step of load balancing, can_migrate_task() now
>> considers a task's LLC preference before moving it out of its
>> preferred LLC.
>>
>> Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
>> Suggested-by: K Prateek Nayak <kprateek.nayak@amd.com>
>> Co-developed-by: Chen Yu <yu.c.chen@intel.com>
>> Signed-off-by: Chen Yu <yu.c.chen@intel.com>
>> Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
>> ---
>>
>> Notes:
>>      v2->v3:
>>      Use the similar mechanism as NUMA balancing, which skips over
>>      the tasks that would degrade locality in can_migrate_task();
>>      and only if nr_balanced_failed is high enough do we ignore that.
>>      (Peter Zijlstra)
>>      
>>      Let migrate_degrade_locality() take precedence over
>>      migrate_degrades_llc(), which aims to migrate towards the preferred
>>      NUMA node. (Peter Zijlstra)
>>
>>   kernel/sched/fair.c  | 64 +++++++++++++++++++++++++++++++++++++++++---
>>   kernel/sched/sched.h | 13 +++++++++
>>   2 files changed, 73 insertions(+), 4 deletions(-)
>>
>> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
>> index 03959a701514..d1145997b88d 100644
>> --- a/kernel/sched/fair.c
>> +++ b/kernel/sched/fair.c
>> @@ -9973,8 +9973,8 @@ static enum llc_mig can_migrate_llc(int src_cpu, int dst_cpu,
>>    * Check if task p can migrate from source LLC to
>>    * destination LLC in terms of cache aware load balance.
>>    */
>> -static __maybe_unused enum llc_mig can_migrate_llc_task(int src_cpu, int dst_cpu,
>> -							struct task_struct *p)
>> +static enum llc_mig can_migrate_llc_task(int src_cpu, int dst_cpu,
>> +					 struct task_struct *p)
>>   {
>>   	struct mm_struct *mm;
>>   	bool to_pref;
>> @@ -10041,6 +10041,47 @@ alb_break_llc(struct lb_env *env)
>>   
>>   	return false;
>>   }
>> +
>> +/*
>> + * Check if migrating task p from env->src_cpu to
>> + * env->dst_cpu breaks LLC localiy.
>> + */
>> +static bool migrate_degrades_llc(struct task_struct *p, struct lb_env *env)
>> +{
>> +	if (!sched_cache_enabled())
>> +		return false;
>> +
>> +	if (task_has_sched_core(p))
>> +		return false;
>> +	/*
>> +	 * Skip over tasks that would degrade LLC locality;
>> +	 * only when nr_balanced_failed is sufficiently high do we
>> +	 * ignore this constraint.
>> +	 *
>> +	 * Threshold of cache_nice_tries is set to 1 higher
>> +	 * than nr_balance_failed to avoid excessive task
>> +	 * migration at the same time. Refer to comments around
>> +	 * llc_balance().
>> +	 */
>> +	if (env->sd->nr_balance_failed >= env->sd->cache_nice_tries + 1)
>> +		return false;
>> +
>> +	/*
>> +	 * We know the env->src_cpu has some tasks prefer to
>> +	 * run on env->dst_cpu, skip the tasks do not prefer
>> +	 * env->dst_cpu, and find the one that prefers.
>> +	 */
>> +	if (env->migration_type == migrate_llc_task &&
>> +	    task_llc(p) != llc_id(env->dst_cpu))
>> +		return true;
> 
> `task_llc(p)` returns the LLC id of the CPU the task is currently running on, right?
> Wouldn’t we need to check the task’s *preferred* LLC instead?
> 
> Am I missing something?
> 

No, you did not miss anything; this is indeed a bug.
I realized this during our discussion yesterday:
https://lore.kernel.org/all/22e975fd-4e31-498d-a016-2168721f532a@intel.com/
will fix it accordingly. Thanks!

thanks,
Chenyu