[PATCH v2 14/23] sched/cache: Consider LLC preference when selecting tasks for load balancing

Tim Chen posted 23 patches 2 weeks, 1 day ago
There is a newer version of this series
[PATCH v2 14/23] sched/cache: Consider LLC preference when selecting tasks for load balancing
Posted by Tim Chen 2 weeks, 1 day ago
Currently, task selection from the busiest runqueue ignores LLC
preferences. Reorder tasks in the busiest queue to prioritize selection
as follows:

  1. Tasks preferring the destination CPU's LLC
  2. Tasks with no LLC preference
  3. Tasks preferring an LLC different from their current one
  4. Tasks preferring the LLC they are currently on

This improves the likelihood that tasks are migrated to their
preferred LLC.

Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
---

Notes:
    v1->v2: No change.

 kernel/sched/fair.c | 66 ++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 65 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index aed3fab98d7c..dd09a816670e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -10092,6 +10092,68 @@ static struct task_struct *detach_one_task(struct lb_env *env)
 	return NULL;
 }
 
+#ifdef CONFIG_SCHED_CACHE
+/*
+ * Prepare lists to detach tasks in the following order:
+ * 1. tasks that prefer dst cpu's LLC
+ * 2. tasks that have no preference in LLC
+ * 3. tasks that prefer LLC other than the ones they are on
+ * 4. tasks that prefer the LLC that they are currently on.
+ */
+static struct list_head
+*order_tasks_by_llc(struct lb_env *env, struct list_head *tasks)
+{
+	struct task_struct *p;
+	LIST_HEAD(pref_old_llc);
+	LIST_HEAD(pref_new_llc);
+	LIST_HEAD(no_pref_llc);
+	LIST_HEAD(pref_other_llc);
+
+	if (!sched_cache_enabled())
+		return tasks;
+
+	if (cpus_share_cache(env->dst_cpu, env->src_cpu))
+		return tasks;
+
+	while (!list_empty(tasks)) {
+		p = list_last_entry(tasks, struct task_struct, se.group_node);
+
+		if (p->preferred_llc == llc_id(env->dst_cpu)) {
+			list_move(&p->se.group_node, &pref_new_llc);
+			continue;
+		}
+
+		if (p->preferred_llc == llc_id(env->src_cpu)) {
+			list_move(&p->se.group_node, &pref_old_llc);
+			continue;
+		}
+
+		if (p->preferred_llc == -1) {
+			list_move(&p->se.group_node, &no_pref_llc);
+			continue;
+		}
+
+		list_move(&p->se.group_node, &pref_other_llc);
+	}
+
+	/*
+	 * We detach tasks from list tail in detach tasks.  Put tasks
+	 * to be chosen first at end of list.
+	 */
+	list_splice(&pref_new_llc, tasks);
+	list_splice(&no_pref_llc, tasks);
+	list_splice(&pref_other_llc, tasks);
+	list_splice(&pref_old_llc, tasks);
+	return tasks;
+}
+#else
+static inline struct list_head
+*order_tasks_by_llc(struct lb_env *env, struct list_head *tasks)
+{
+	return tasks;
+}
+#endif
+
 /*
  * detach_tasks() -- tries to detach up to imbalance load/util/tasks from
  * busiest_rq, as part of a balancing operation within domain "sd".
@@ -10100,7 +10162,7 @@ static struct task_struct *detach_one_task(struct lb_env *env)
  */
 static int detach_tasks(struct lb_env *env)
 {
-	struct list_head *tasks = &env->src_rq->cfs_tasks;
+	struct list_head *tasks;
 	unsigned long util, load;
 	struct task_struct *p;
 	int detached = 0;
@@ -10119,6 +10181,8 @@ static int detach_tasks(struct lb_env *env)
 	if (env->imbalance <= 0)
 		return 0;
 
+	tasks = order_tasks_by_llc(env, &env->src_rq->cfs_tasks);
+
 	while (!list_empty(tasks)) {
 		/*
 		 * We don't want to steal all, otherwise we may be treated likewise,
-- 
2.32.0
Re: [PATCH v2 14/23] sched/cache: Consider LLC preference when selecting tasks for load balancing
Posted by Peter Zijlstra 1 week, 2 days ago
On Wed, Dec 03, 2025 at 03:07:33PM -0800, Tim Chen wrote:
> Currently, task selection from the busiest runqueue ignores LLC
> preferences. Reorder tasks in the busiest queue to prioritize selection
> as follows:
> 
>   1. Tasks preferring the destination CPU's LLC
>   2. Tasks with no LLC preference
>   3. Tasks preferring an LLC different from their current one
>   4. Tasks preferring the LLC they are currently on
> 
> This improves the likelihood that tasks are migrated to their
> preferred LLC.
> 
> Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
> ---
> 
> Notes:
>     v1->v2: No change.
> 
>  kernel/sched/fair.c | 66 ++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 65 insertions(+), 1 deletion(-)
> 
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index aed3fab98d7c..dd09a816670e 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -10092,6 +10092,68 @@ static struct task_struct *detach_one_task(struct lb_env *env)
>  	return NULL;
>  }
>  
> +#ifdef CONFIG_SCHED_CACHE
> +/*
> + * Prepare lists to detach tasks in the following order:
> + * 1. tasks that prefer dst cpu's LLC
> + * 2. tasks that have no preference in LLC
> + * 3. tasks that prefer LLC other than the ones they are on
> + * 4. tasks that prefer the LLC that they are currently on.
> + */
> +static struct list_head
> +*order_tasks_by_llc(struct lb_env *env, struct list_head *tasks)
> +{
> +	struct task_struct *p;
> +	LIST_HEAD(pref_old_llc);
> +	LIST_HEAD(pref_new_llc);
> +	LIST_HEAD(no_pref_llc);
> +	LIST_HEAD(pref_other_llc);
> +
> +	if (!sched_cache_enabled())
> +		return tasks;
> +
> +	if (cpus_share_cache(env->dst_cpu, env->src_cpu))
> +		return tasks;
> +
> +	while (!list_empty(tasks)) {
> +		p = list_last_entry(tasks, struct task_struct, se.group_node);
> +
> +		if (p->preferred_llc == llc_id(env->dst_cpu)) {
> +			list_move(&p->se.group_node, &pref_new_llc);
> +			continue;
> +		}
> +
> +		if (p->preferred_llc == llc_id(env->src_cpu)) {
> +			list_move(&p->se.group_node, &pref_old_llc);
> +			continue;
> +		}
> +
> +		if (p->preferred_llc == -1) {
> +			list_move(&p->se.group_node, &no_pref_llc);
> +			continue;
> +		}
> +
> +		list_move(&p->se.group_node, &pref_other_llc);
> +	}
> +
> +	/*
> +	 * We detach tasks from list tail in detach tasks.  Put tasks
> +	 * to be chosen first at end of list.
> +	 */
> +	list_splice(&pref_new_llc, tasks);
> +	list_splice(&no_pref_llc, tasks);
> +	list_splice(&pref_other_llc, tasks);
> +	list_splice(&pref_old_llc, tasks);
> +	return tasks;
> +}

> @@ -10119,6 +10181,8 @@ static int detach_tasks(struct lb_env *env)
>  	if (env->imbalance <= 0)
>  		return 0;
>  
> +	tasks = order_tasks_by_llc(env, &env->src_rq->cfs_tasks);
> +
>  	while (!list_empty(tasks)) {
>  		/*
>  		 * We don't want to steal all, otherwise we may be treated likewise,

Humrph. So NUMA balancing does this differently. It skips over the tasks
that would degrade locality in can_migrate_task(); and only if
nr_balanced_failed is high enough do we ignore that.

Also, if there are a significant number of tasks on the list, this gets
in the way of things like loop_break, since it does this sort
unconditionally.

Bah, this feels like there is a sane way to integrate all this, but it
seems to escape me at the moment. I'll ponder it a bit more.