[v5] sched/fair: Rework EAS to handle more cases

[PATCH 5/7 v5] sched/fair: Add push task mechanism for EAS

Posted by Vincent Guittot 11 months, 2 weeks ago

EAS is based on wakeup events to efficiently place tasks on the system, but
there are cases where a task doesn't have wakeup events anymore or at a far
too low pace. For such situation, we can take advantage of the task being
put back in the enqueued list to check if it should be pushed on another
CPU. When the task is alone on the CPU, it's never put back in the enqueued
list; In this special case, we use the tick to run the check.

Wake up events remain the main way to migrate tasks but we now detect
situation where a task is stuck on a CPU by checking that its utilization
is larger than the max available compute capacity (max cpu capacity or
uclamp max setting)

Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
---
 kernel/sched/fair.c  | 220 +++++++++++++++++++++++++++++++++++++++++++
 kernel/sched/sched.h |   2 +
 2 files changed, 222 insertions(+)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index a9b97bbc085f..c3e383b86808 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7051,6 +7051,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 	hrtick_update(rq);
 }
 
+static void fair_remove_pushable_task(struct rq *rq, struct task_struct *p);
 static void set_next_buddy(struct sched_entity *se);
 
 /*
@@ -7081,6 +7082,8 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
 		h_nr_idle = task_has_idle_policy(p);
 		if (task_sleep || task_delayed || !se->sched_delayed)
 			h_nr_runnable = 1;
+
+		fair_remove_pushable_task(rq, p);
 	} else {
 		cfs_rq = group_cfs_rq(se);
 		slice = cfs_rq_min_slice(cfs_rq);
@@ -8589,6 +8592,197 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
 	return target;
 }
 
+static inline bool task_stuck_on_cpu(struct task_struct *p, int cpu)
+{
+	unsigned long max_capa, util;
+
+	max_capa = min(get_actual_cpu_capacity(cpu),
+		       uclamp_eff_value(p, UCLAMP_MAX));
+	util = max(task_util_est(p), task_runnable(p));
+
+	/*
+	 * Return true only if the task might not sleep/wakeup because of a low
+	 * compute capacity. Tasks, which wake up regularly, will be handled by
+	 * feec().
+	 */
+	return (util > max_capa);
+}
+
+static inline bool sched_energy_push_task(struct task_struct *p, struct rq *rq)
+{
+	if (p->nr_cpus_allowed == 1)
+		return false;
+
+	if (is_rd_overutilized(rq->rd))
+		return false;
+
+	if (task_stuck_on_cpu(p, cpu_of(rq)))
+		return true;
+
+	return false;
+}
+
+static int active_load_balance_cpu_stop(void *data);
+
+static inline void check_pushable_task(struct task_struct *p, struct rq *rq)
+{
+	int new_cpu, cpu = cpu_of(rq);
+
+	if (!sched_energy_enabled())
+		return;
+
+	if (WARN_ON(!p))
+		return;
+
+	if (WARN_ON(!task_current(rq, p)))
+		return;
+
+	if (is_migration_disabled(p))
+		return;
+
+	/* If there are several task, wait for being put back */
+	if (rq->nr_running > 1)
+		return;
+
+	if (!sched_energy_push_task(p, rq))
+		return;
+
+	new_cpu = find_energy_efficient_cpu(p, cpu);
+
+	if (new_cpu == cpu)
+		return;
+
+	/*
+	 * ->active_balance synchronizes accesses to
+	 * ->active_balance_work.  Once set, it's cleared
+	 * only after active load balance is finished.
+	 */
+	if (!rq->active_balance) {
+		rq->active_balance = 1;
+		rq->push_cpu = new_cpu;
+	} else
+		return;
+
+	raw_spin_rq_unlock(rq);
+	stop_one_cpu_nowait(cpu,
+		active_load_balance_cpu_stop, rq,
+		&rq->active_balance_work);
+	raw_spin_rq_lock(rq);
+}
+
+static inline int has_pushable_tasks(struct rq *rq)
+{
+	return !plist_head_empty(&rq->cfs.pushable_tasks);
+}
+
+static struct task_struct *pick_next_pushable_fair_task(struct rq *rq)
+{
+	struct task_struct *p;
+
+	if (!has_pushable_tasks(rq))
+		return NULL;
+
+	p = plist_first_entry(&rq->cfs.pushable_tasks,
+			      struct task_struct, pushable_tasks);
+
+	WARN_ON_ONCE(rq->cpu != task_cpu(p));
+	WARN_ON_ONCE(task_current(rq, p));
+	WARN_ON_ONCE(p->nr_cpus_allowed <= 1);
+	WARN_ON_ONCE(!task_on_rq_queued(p));
+
+	/*
+	 * Remove task from the pushable list as we try only once after that
+	 * the task has been put back in enqueued list.
+	 */
+	plist_del(&p->pushable_tasks, &rq->cfs.pushable_tasks);
+
+	return p;
+}
+
+/*
+ * See if the non running fair tasks on this rq can be sent on other CPUs
+ * that fits better with their profile.
+ */
+static bool push_fair_task(struct rq *rq)
+{
+	struct task_struct *next_task;
+	int prev_cpu, new_cpu;
+	struct rq *new_rq;
+
+	next_task = pick_next_pushable_fair_task(rq);
+	if (!next_task)
+		return false;
+
+	if (is_migration_disabled(next_task))
+		return true;
+
+	/* We might release rq lock */
+	get_task_struct(next_task);
+
+	prev_cpu = rq->cpu;
+
+	new_cpu = find_energy_efficient_cpu(next_task, prev_cpu);
+
+	if (new_cpu == prev_cpu)
+		goto out;
+
+	new_rq = cpu_rq(new_cpu);
+
+	if (double_lock_balance(rq, new_rq)) {
+		/* The task has already migrated in between */
+		if (task_cpu(next_task) != rq->cpu) {
+			double_unlock_balance(rq, new_rq);
+			goto out;
+		}
+
+		deactivate_task(rq, next_task, 0);
+		set_task_cpu(next_task, new_cpu);
+		activate_task(new_rq, next_task, 0);
+
+		resched_curr(new_rq);
+
+		double_unlock_balance(rq, new_rq);
+	}
+
+out:
+	put_task_struct(next_task);
+
+	return true;
+}
+
+static void push_fair_tasks(struct rq *rq)
+{
+	/* push_fair_task() will return true if it moved a fair task */
+	while (push_fair_task(rq))
+		;
+}
+
+static DEFINE_PER_CPU(struct balance_callback, fair_push_head);
+
+static inline void fair_queue_pushable_tasks(struct rq *rq)
+{
+	if (!sched_energy_enabled() || !has_pushable_tasks(rq))
+		return;
+
+	queue_balance_callback(rq, &per_cpu(fair_push_head, rq->cpu), push_fair_tasks);
+}
+static void fair_remove_pushable_task(struct rq *rq, struct task_struct *p)
+{
+	if (sched_energy_enabled())
+		plist_del(&p->pushable_tasks, &rq->cfs.pushable_tasks);
+}
+
+static void fair_add_pushable_task(struct rq *rq, struct task_struct *p)
+{
+	if (sched_energy_enabled() && task_on_rq_queued(p) && !p->se.sched_delayed) {
+		if (sched_energy_push_task(p, rq)) {
+			plist_del(&p->pushable_tasks, &rq->cfs.pushable_tasks);
+			plist_node_init(&p->pushable_tasks, p->prio);
+			plist_add(&p->pushable_tasks, &rq->cfs.pushable_tasks);
+		}
+	}
+}
+
 /*
  * select_task_rq_fair: Select target runqueue for the waking task in domains
  * that have the relevant SD flag set. In practice, this is SD_BALANCE_WAKE,
@@ -8758,6 +8952,10 @@ balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 	return sched_balance_newidle(rq, rf) != 0;
 }
 #else
+static inline void check_pushable_task(struct task_struct *p, struct rq *rq) {}
+static inline void fair_queue_pushable_tasks(struct rq *rq) {}
+static void fair_remove_pushable_task(struct rq *rq, struct task_struct *p) {}
+static inline void fair_add_pushable_task(struct rq *rq, struct task_struct *p) {}
 static inline void set_task_max_allowed_capacity(struct task_struct *p) {}
 #endif /* CONFIG_SMP */
 
@@ -8947,6 +9145,12 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf
 		put_prev_entity(cfs_rq, pse);
 		set_next_entity(cfs_rq, se);
 
+		/*
+		 * The previous task might be eligible for being pushed on
+		 * another cpu if it is still active.
+		 */
+		fair_add_pushable_task(rq, prev);
+
 		__set_next_task_fair(rq, p, true);
 	}
 
@@ -9019,6 +9223,13 @@ static void put_prev_task_fair(struct rq *rq, struct task_struct *prev, struct t
 		cfs_rq = cfs_rq_of(se);
 		put_prev_entity(cfs_rq, se);
 	}
+
+	/*
+	 * The previous task might be eligible for being pushed on another cpu
+	 * if it is still active.
+	 */
+	fair_add_pushable_task(rq, prev);
+
 }
 
 /*
@@ -13151,6 +13362,7 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
 	if (static_branch_unlikely(&sched_numa_balancing))
 		task_tick_numa(rq, curr);
 
+	check_pushable_task(curr, rq);
 	update_misfit_status(curr, rq);
 	check_update_overutilized_status(task_rq(curr));
 
@@ -13303,6 +13515,8 @@ static void __set_next_task_fair(struct rq *rq, struct task_struct *p, bool firs
 {
 	struct sched_entity *se = &p->se;
 
+	fair_remove_pushable_task(rq, p);
+
 #ifdef CONFIG_SMP
 	if (task_on_rq_queued(p)) {
 		/*
@@ -13320,6 +13534,11 @@ static void __set_next_task_fair(struct rq *rq, struct task_struct *p, bool firs
 	if (hrtick_enabled_fair(rq))
 		hrtick_start_fair(rq, p);
 
+	/*
+	 * Try to push prev task before checking misfit for next task as
+	 * the migration of prev can make next fitting the CPU
+	 */
+	fair_queue_pushable_tasks(rq);
 	update_misfit_status(p, rq);
 	sched_fair_update_stop_tick(rq, p);
 }
@@ -13350,6 +13569,7 @@ void init_cfs_rq(struct cfs_rq *cfs_rq)
 	cfs_rq->tasks_timeline = RB_ROOT_CACHED;
 	cfs_rq->min_vruntime = (u64)(-(1LL << 20));
 #ifdef CONFIG_SMP
+	plist_head_init(&cfs_rq->pushable_tasks);
 	raw_spin_lock_init(&cfs_rq->removed.lock);
 #endif
 }
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index ab16d3d0e51c..2db198dccf21 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -722,6 +722,8 @@ struct cfs_rq {
 	struct list_head	leaf_cfs_rq_list;
 	struct task_group	*tg;	/* group that "owns" this runqueue */
 
+	struct plist_head	pushable_tasks;
+
 	/* Locally cached copy of our task_group's idle value */
 	int			idle;
 
-- 
2.43.0

Re: [PATCH 5/7 v5] sched/fair: Add push task mechanism for EAS

Posted by Xuewen Yan 10 months ago

Hi Vincent,

On Mon, Mar 3, 2025 at 5:06 AM Vincent Guittot
<vincent.guittot@linaro.org> wrote:
>
> EAS is based on wakeup events to efficiently place tasks on the system, but
> there are cases where a task doesn't have wakeup events anymore or at a far
> too low pace. For such situation, we can take advantage of the task being
> put back in the enqueued list to check if it should be pushed on another
> CPU. When the task is alone on the CPU, it's never put back in the enqueued
> list; In this special case, we use the tick to run the check.
>
> Wake up events remain the main way to migrate tasks but we now detect
> situation where a task is stuck on a CPU by checking that its utilization
> is larger than the max available compute capacity (max cpu capacity or
> uclamp max setting)
>
> Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
> ---
>  kernel/sched/fair.c  | 220 +++++++++++++++++++++++++++++++++++++++++++
>  kernel/sched/sched.h |   2 +
>  2 files changed, 222 insertions(+)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index a9b97bbc085f..c3e383b86808 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -7051,6 +7051,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
>         hrtick_update(rq);
>  }
>
> +static void fair_remove_pushable_task(struct rq *rq, struct task_struct *p);
>  static void set_next_buddy(struct sched_entity *se);
>
>  /*
> @@ -7081,6 +7082,8 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
>                 h_nr_idle = task_has_idle_policy(p);
>                 if (task_sleep || task_delayed || !se->sched_delayed)
>                         h_nr_runnable = 1;
> +
> +               fair_remove_pushable_task(rq, p);
>         } else {
>                 cfs_rq = group_cfs_rq(se);
>                 slice = cfs_rq_min_slice(cfs_rq);
> @@ -8589,6 +8592,197 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
>         return target;
>  }
>
> +static inline bool task_stuck_on_cpu(struct task_struct *p, int cpu)
> +{
> +       unsigned long max_capa, util;
> +
> +       max_capa = min(get_actual_cpu_capacity(cpu),
> +                      uclamp_eff_value(p, UCLAMP_MAX));
> +       util = max(task_util_est(p), task_runnable(p));
> +
> +       /*
> +        * Return true only if the task might not sleep/wakeup because of a low
> +        * compute capacity. Tasks, which wake up regularly, will be handled by
> +        * feec().
> +        */
I am carefully studying this series of patches. I have some doubts
about this part.

Need we check the state?
READ_ONCE(p->__state) != TASK_RUNNING;
Because the tick will check it.

On the other hand, need we check the sched_delayed?
Because it also checks it in put_prev_task_fair().

Thanks!

> +       return (util > max_capa);
> +}
> +
> +static inline bool sched_energy_push_task(struct task_struct *p, struct rq *rq)
> +{
> +       if (p->nr_cpus_allowed == 1)
> +               return false;
> +
> +       if (is_rd_overutilized(rq->rd))
> +               return false;
> +
> +       if (task_stuck_on_cpu(p, cpu_of(rq)))
> +               return true;
> +
> +       return false;
> +}
> +
> +static int active_load_balance_cpu_stop(void *data);
> +
> +static inline void check_pushable_task(struct task_struct *p, struct rq *rq)
> +{
> +       int new_cpu, cpu = cpu_of(rq);
> +
> +       if (!sched_energy_enabled())
> +               return;
> +
> +       if (WARN_ON(!p))
> +               return;
> +
> +       if (WARN_ON(!task_current(rq, p)))
> +               return;
> +
> +       if (is_migration_disabled(p))
> +               return;
> +
> +       /* If there are several task, wait for being put back */
> +       if (rq->nr_running > 1)
> +               return;
> +
> +       if (!sched_energy_push_task(p, rq))
> +               return;
> +
> +       new_cpu = find_energy_efficient_cpu(p, cpu);
> +
> +       if (new_cpu == cpu)
> +               return;
> +
> +       /*
> +        * ->active_balance synchronizes accesses to
> +        * ->active_balance_work.  Once set, it's cleared
> +        * only after active load balance is finished.
> +        */
> +       if (!rq->active_balance) {
> +               rq->active_balance = 1;
> +               rq->push_cpu = new_cpu;
> +       } else
> +               return;
> +
> +       raw_spin_rq_unlock(rq);
> +       stop_one_cpu_nowait(cpu,
> +               active_load_balance_cpu_stop, rq,
> +               &rq->active_balance_work);
> +       raw_spin_rq_lock(rq);
> +}
> +
> +static inline int has_pushable_tasks(struct rq *rq)
> +{
> +       return !plist_head_empty(&rq->cfs.pushable_tasks);
> +}
> +
> +static struct task_struct *pick_next_pushable_fair_task(struct rq *rq)
> +{
> +       struct task_struct *p;
> +
> +       if (!has_pushable_tasks(rq))
> +               return NULL;
> +
> +       p = plist_first_entry(&rq->cfs.pushable_tasks,
> +                             struct task_struct, pushable_tasks);
> +
> +       WARN_ON_ONCE(rq->cpu != task_cpu(p));
> +       WARN_ON_ONCE(task_current(rq, p));
> +       WARN_ON_ONCE(p->nr_cpus_allowed <= 1);
> +       WARN_ON_ONCE(!task_on_rq_queued(p));
> +
> +       /*
> +        * Remove task from the pushable list as we try only once after that
> +        * the task has been put back in enqueued list.
> +        */
> +       plist_del(&p->pushable_tasks, &rq->cfs.pushable_tasks);
> +
> +       return p;
> +}
> +
> +/*
> + * See if the non running fair tasks on this rq can be sent on other CPUs
> + * that fits better with their profile.
> + */
> +static bool push_fair_task(struct rq *rq)
> +{
> +       struct task_struct *next_task;
> +       int prev_cpu, new_cpu;
> +       struct rq *new_rq;
> +
> +       next_task = pick_next_pushable_fair_task(rq);
> +       if (!next_task)
> +               return false;
> +
> +       if (is_migration_disabled(next_task))
> +               return true;
> +
> +       /* We might release rq lock */
> +       get_task_struct(next_task);
> +
> +       prev_cpu = rq->cpu;
> +
> +       new_cpu = find_energy_efficient_cpu(next_task, prev_cpu);
> +
> +       if (new_cpu == prev_cpu)
> +               goto out;
> +
> +       new_rq = cpu_rq(new_cpu);
> +
> +       if (double_lock_balance(rq, new_rq)) {
> +               /* The task has already migrated in between */
> +               if (task_cpu(next_task) != rq->cpu) {
> +                       double_unlock_balance(rq, new_rq);
> +                       goto out;
> +               }
> +
> +               deactivate_task(rq, next_task, 0);
> +               set_task_cpu(next_task, new_cpu);
> +               activate_task(new_rq, next_task, 0);
> +
> +               resched_curr(new_rq);
> +
> +               double_unlock_balance(rq, new_rq);
> +       }
> +
> +out:
> +       put_task_struct(next_task);
> +
> +       return true;
> +}
> +
> +static void push_fair_tasks(struct rq *rq)
> +{
> +       /* push_fair_task() will return true if it moved a fair task */
> +       while (push_fair_task(rq))
> +               ;
> +}
> +
> +static DEFINE_PER_CPU(struct balance_callback, fair_push_head);
> +
> +static inline void fair_queue_pushable_tasks(struct rq *rq)
> +{
> +       if (!sched_energy_enabled() || !has_pushable_tasks(rq))
> +               return;
> +
> +       queue_balance_callback(rq, &per_cpu(fair_push_head, rq->cpu), push_fair_tasks);
> +}
> +static void fair_remove_pushable_task(struct rq *rq, struct task_struct *p)
> +{
> +       if (sched_energy_enabled())
> +               plist_del(&p->pushable_tasks, &rq->cfs.pushable_tasks);
> +}
> +
> +static void fair_add_pushable_task(struct rq *rq, struct task_struct *p)
> +{
> +       if (sched_energy_enabled() && task_on_rq_queued(p) && !p->se.sched_delayed) {
> +               if (sched_energy_push_task(p, rq)) {
> +                       plist_del(&p->pushable_tasks, &rq->cfs.pushable_tasks);
> +                       plist_node_init(&p->pushable_tasks, p->prio);
> +                       plist_add(&p->pushable_tasks, &rq->cfs.pushable_tasks);
> +               }
> +       }
> +}
> +
>  /*
>   * select_task_rq_fair: Select target runqueue for the waking task in domains
>   * that have the relevant SD flag set. In practice, this is SD_BALANCE_WAKE,
> @@ -8758,6 +8952,10 @@ balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
>         return sched_balance_newidle(rq, rf) != 0;
>  }
>  #else
> +static inline void check_pushable_task(struct task_struct *p, struct rq *rq) {}
> +static inline void fair_queue_pushable_tasks(struct rq *rq) {}
> +static void fair_remove_pushable_task(struct rq *rq, struct task_struct *p) {}
> +static inline void fair_add_pushable_task(struct rq *rq, struct task_struct *p) {}
>  static inline void set_task_max_allowed_capacity(struct task_struct *p) {}
>  #endif /* CONFIG_SMP */
>
> @@ -8947,6 +9145,12 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf
>                 put_prev_entity(cfs_rq, pse);
>                 set_next_entity(cfs_rq, se);
>
> +               /*
> +                * The previous task might be eligible for being pushed on
> +                * another cpu if it is still active.
> +                */
> +               fair_add_pushable_task(rq, prev);
> +
>                 __set_next_task_fair(rq, p, true);
>         }
>
> @@ -9019,6 +9223,13 @@ static void put_prev_task_fair(struct rq *rq, struct task_struct *prev, struct t
>                 cfs_rq = cfs_rq_of(se);
>                 put_prev_entity(cfs_rq, se);
>         }
> +
> +       /*
> +        * The previous task might be eligible for being pushed on another cpu
> +        * if it is still active.
> +        */
> +       fair_add_pushable_task(rq, prev);
> +
>  }
>
>  /*
> @@ -13151,6 +13362,7 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
>         if (static_branch_unlikely(&sched_numa_balancing))
>                 task_tick_numa(rq, curr);
>
> +       check_pushable_task(curr, rq);
>         update_misfit_status(curr, rq);
>         check_update_overutilized_status(task_rq(curr));
>
> @@ -13303,6 +13515,8 @@ static void __set_next_task_fair(struct rq *rq, struct task_struct *p, bool firs
>  {
>         struct sched_entity *se = &p->se;
>
> +       fair_remove_pushable_task(rq, p);
> +
>  #ifdef CONFIG_SMP
>         if (task_on_rq_queued(p)) {
>                 /*
> @@ -13320,6 +13534,11 @@ static void __set_next_task_fair(struct rq *rq, struct task_struct *p, bool firs
>         if (hrtick_enabled_fair(rq))
>                 hrtick_start_fair(rq, p);
>
> +       /*
> +        * Try to push prev task before checking misfit for next task as
> +        * the migration of prev can make next fitting the CPU
> +        */
> +       fair_queue_pushable_tasks(rq);
>         update_misfit_status(p, rq);
>         sched_fair_update_stop_tick(rq, p);
>  }
> @@ -13350,6 +13569,7 @@ void init_cfs_rq(struct cfs_rq *cfs_rq)
>         cfs_rq->tasks_timeline = RB_ROOT_CACHED;
>         cfs_rq->min_vruntime = (u64)(-(1LL << 20));
>  #ifdef CONFIG_SMP
> +       plist_head_init(&cfs_rq->pushable_tasks);
>         raw_spin_lock_init(&cfs_rq->removed.lock);
>  #endif
>  }
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index ab16d3d0e51c..2db198dccf21 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -722,6 +722,8 @@ struct cfs_rq {
>         struct list_head        leaf_cfs_rq_list;
>         struct task_group       *tg;    /* group that "owns" this runqueue */
>
> +       struct plist_head       pushable_tasks;
> +
>         /* Locally cached copy of our task_group's idle value */
>         int                     idle;
>
> --
> 2.43.0
>
>

Re: [PATCH 5/7 v5] sched/fair: Add push task mechanism for EAS

Posted by Vincent Guittot 10 months ago

On Tue, 15 Apr 2025 at 04:31, Xuewen Yan <xuewen.yan94@gmail.com> wrote:
>
> Hi Vincent,
>
> On Mon, Mar 3, 2025 at 5:06 AM Vincent Guittot
> <vincent.guittot@linaro.org> wrote:
> >
> > EAS is based on wakeup events to efficiently place tasks on the system, but
> > there are cases where a task doesn't have wakeup events anymore or at a far
> > too low pace. For such situation, we can take advantage of the task being
> > put back in the enqueued list to check if it should be pushed on another
> > CPU. When the task is alone on the CPU, it's never put back in the enqueued
> > list; In this special case, we use the tick to run the check.
> >
> > Wake up events remain the main way to migrate tasks but we now detect
> > situation where a task is stuck on a CPU by checking that its utilization
> > is larger than the max available compute capacity (max cpu capacity or
> > uclamp max setting)
> >
> > Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
> > ---
> >  kernel/sched/fair.c  | 220 +++++++++++++++++++++++++++++++++++++++++++
> >  kernel/sched/sched.h |   2 +
> >  2 files changed, 222 insertions(+)
> >
> > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> > index a9b97bbc085f..c3e383b86808 100644
> > --- a/kernel/sched/fair.c
> > +++ b/kernel/sched/fair.c
> > @@ -7051,6 +7051,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
> >         hrtick_update(rq);
> >  }
> >
> > +static void fair_remove_pushable_task(struct rq *rq, struct task_struct *p);
> >  static void set_next_buddy(struct sched_entity *se);
> >
> >  /*
> > @@ -7081,6 +7082,8 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
> >                 h_nr_idle = task_has_idle_policy(p);
> >                 if (task_sleep || task_delayed || !se->sched_delayed)
> >                         h_nr_runnable = 1;
> > +
> > +               fair_remove_pushable_task(rq, p);
> >         } else {
> >                 cfs_rq = group_cfs_rq(se);
> >                 slice = cfs_rq_min_slice(cfs_rq);
> > @@ -8589,6 +8592,197 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
> >         return target;
> >  }
> >
> > +static inline bool task_stuck_on_cpu(struct task_struct *p, int cpu)
> > +{
> > +       unsigned long max_capa, util;
> > +
> > +       max_capa = min(get_actual_cpu_capacity(cpu),
> > +                      uclamp_eff_value(p, UCLAMP_MAX));
> > +       util = max(task_util_est(p), task_runnable(p));
> > +
> > +       /*
> > +        * Return true only if the task might not sleep/wakeup because of a low
> > +        * compute capacity. Tasks, which wake up regularly, will be handled by
> > +        * feec().
> > +        */
> I am carefully studying this series of patches. I have some doubts
> about this part.
>
> Need we check the state?
> READ_ONCE(p->__state) != TASK_RUNNING;
> Because the tick will check it.
>
> On the other hand, need we check the sched_delayed?
> Because it also checks it in put_prev_task_fair().

In the case of tick, the task is the current task and the only one running


>
> Thanks!
>
> > +       return (util > max_capa);
> > +}
> > +
> > +static inline bool sched_energy_push_task(struct task_struct *p, struct rq *rq)
> > +{
> > +       if (p->nr_cpus_allowed == 1)
> > +               return false;
> > +
> > +       if (is_rd_overutilized(rq->rd))
> > +               return false;
> > +
> > +       if (task_stuck_on_cpu(p, cpu_of(rq)))
> > +               return true;
> > +
> > +       return false;
> > +}
> > +
> > +static int active_load_balance_cpu_stop(void *data);
> > +
> > +static inline void check_pushable_task(struct task_struct *p, struct rq *rq)
> > +{
> > +       int new_cpu, cpu = cpu_of(rq);
> > +
> > +       if (!sched_energy_enabled())
> > +               return;
> > +
> > +       if (WARN_ON(!p))
> > +               return;
> > +
> > +       if (WARN_ON(!task_current(rq, p)))
> > +               return;
> > +
> > +       if (is_migration_disabled(p))
> > +               return;
> > +
> > +       /* If there are several task, wait for being put back */
> > +       if (rq->nr_running > 1)
> > +               return;
> > +
> > +       if (!sched_energy_push_task(p, rq))
> > +               return;
> > +
> > +       new_cpu = find_energy_efficient_cpu(p, cpu);
> > +
> > +       if (new_cpu == cpu)
> > +               return;
> > +
> > +       /*
> > +        * ->active_balance synchronizes accesses to
> > +        * ->active_balance_work.  Once set, it's cleared
> > +        * only after active load balance is finished.
> > +        */
> > +       if (!rq->active_balance) {
> > +               rq->active_balance = 1;
> > +               rq->push_cpu = new_cpu;
> > +       } else
> > +               return;
> > +
> > +       raw_spin_rq_unlock(rq);
> > +       stop_one_cpu_nowait(cpu,
> > +               active_load_balance_cpu_stop, rq,
> > +               &rq->active_balance_work);
> > +       raw_spin_rq_lock(rq);
> > +}
> > +
> > +static inline int has_pushable_tasks(struct rq *rq)
> > +{
> > +       return !plist_head_empty(&rq->cfs.pushable_tasks);
> > +}
> > +
> > +static struct task_struct *pick_next_pushable_fair_task(struct rq *rq)
> > +{
> > +       struct task_struct *p;
> > +
> > +       if (!has_pushable_tasks(rq))
> > +               return NULL;
> > +
> > +       p = plist_first_entry(&rq->cfs.pushable_tasks,
> > +                             struct task_struct, pushable_tasks);
> > +
> > +       WARN_ON_ONCE(rq->cpu != task_cpu(p));
> > +       WARN_ON_ONCE(task_current(rq, p));
> > +       WARN_ON_ONCE(p->nr_cpus_allowed <= 1);
> > +       WARN_ON_ONCE(!task_on_rq_queued(p));
> > +
> > +       /*
> > +        * Remove task from the pushable list as we try only once after that
> > +        * the task has been put back in enqueued list.
> > +        */
> > +       plist_del(&p->pushable_tasks, &rq->cfs.pushable_tasks);
> > +
> > +       return p;
> > +}
> > +
> > +/*
> > + * See if the non running fair tasks on this rq can be sent on other CPUs
> > + * that fits better with their profile.
> > + */
> > +static bool push_fair_task(struct rq *rq)
> > +{
> > +       struct task_struct *next_task;
> > +       int prev_cpu, new_cpu;
> > +       struct rq *new_rq;
> > +
> > +       next_task = pick_next_pushable_fair_task(rq);
> > +       if (!next_task)
> > +               return false;
> > +
> > +       if (is_migration_disabled(next_task))
> > +               return true;
> > +
> > +       /* We might release rq lock */
> > +       get_task_struct(next_task);
> > +
> > +       prev_cpu = rq->cpu;
> > +
> > +       new_cpu = find_energy_efficient_cpu(next_task, prev_cpu);
> > +
> > +       if (new_cpu == prev_cpu)
> > +               goto out;
> > +
> > +       new_rq = cpu_rq(new_cpu);
> > +
> > +       if (double_lock_balance(rq, new_rq)) {
> > +               /* The task has already migrated in between */
> > +               if (task_cpu(next_task) != rq->cpu) {
> > +                       double_unlock_balance(rq, new_rq);
> > +                       goto out;
> > +               }
> > +
> > +               deactivate_task(rq, next_task, 0);
> > +               set_task_cpu(next_task, new_cpu);
> > +               activate_task(new_rq, next_task, 0);
> > +
> > +               resched_curr(new_rq);
> > +
> > +               double_unlock_balance(rq, new_rq);
> > +       }
> > +
> > +out:
> > +       put_task_struct(next_task);
> > +
> > +       return true;
> > +}
> > +
> > +static void push_fair_tasks(struct rq *rq)
> > +{
> > +       /* push_fair_task() will return true if it moved a fair task */
> > +       while (push_fair_task(rq))
> > +               ;
> > +}
> > +
> > +static DEFINE_PER_CPU(struct balance_callback, fair_push_head);
> > +
> > +static inline void fair_queue_pushable_tasks(struct rq *rq)
> > +{
> > +       if (!sched_energy_enabled() || !has_pushable_tasks(rq))
> > +               return;
> > +
> > +       queue_balance_callback(rq, &per_cpu(fair_push_head, rq->cpu), push_fair_tasks);
> > +}
> > +static void fair_remove_pushable_task(struct rq *rq, struct task_struct *p)
> > +{
> > +       if (sched_energy_enabled())
> > +               plist_del(&p->pushable_tasks, &rq->cfs.pushable_tasks);
> > +}
> > +
> > +static void fair_add_pushable_task(struct rq *rq, struct task_struct *p)
> > +{
> > +       if (sched_energy_enabled() && task_on_rq_queued(p) && !p->se.sched_delayed) {
> > +               if (sched_energy_push_task(p, rq)) {
> > +                       plist_del(&p->pushable_tasks, &rq->cfs.pushable_tasks);
> > +                       plist_node_init(&p->pushable_tasks, p->prio);
> > +                       plist_add(&p->pushable_tasks, &rq->cfs.pushable_tasks);
> > +               }
> > +       }
> > +}
> > +
> >  /*
> >   * select_task_rq_fair: Select target runqueue for the waking task in domains
> >   * that have the relevant SD flag set. In practice, this is SD_BALANCE_WAKE,
> > @@ -8758,6 +8952,10 @@ balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
> >         return sched_balance_newidle(rq, rf) != 0;
> >  }
> >  #else
> > +static inline void check_pushable_task(struct task_struct *p, struct rq *rq) {}
> > +static inline void fair_queue_pushable_tasks(struct rq *rq) {}
> > +static void fair_remove_pushable_task(struct rq *rq, struct task_struct *p) {}
> > +static inline void fair_add_pushable_task(struct rq *rq, struct task_struct *p) {}
> >  static inline void set_task_max_allowed_capacity(struct task_struct *p) {}
> >  #endif /* CONFIG_SMP */
> >
> > @@ -8947,6 +9145,12 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf
> >                 put_prev_entity(cfs_rq, pse);
> >                 set_next_entity(cfs_rq, se);
> >
> > +               /*
> > +                * The previous task might be eligible for being pushed on
> > +                * another cpu if it is still active.
> > +                */
> > +               fair_add_pushable_task(rq, prev);
> > +
> >                 __set_next_task_fair(rq, p, true);
> >         }
> >
> > @@ -9019,6 +9223,13 @@ static void put_prev_task_fair(struct rq *rq, struct task_struct *prev, struct t
> >                 cfs_rq = cfs_rq_of(se);
> >                 put_prev_entity(cfs_rq, se);
> >         }
> > +
> > +       /*
> > +        * The previous task might be eligible for being pushed on another cpu
> > +        * if it is still active.
> > +        */
> > +       fair_add_pushable_task(rq, prev);
> > +
> >  }
> >
> >  /*
> > @@ -13151,6 +13362,7 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
> >         if (static_branch_unlikely(&sched_numa_balancing))
> >                 task_tick_numa(rq, curr);
> >
> > +       check_pushable_task(curr, rq);
> >         update_misfit_status(curr, rq);
> >         check_update_overutilized_status(task_rq(curr));
> >
> > @@ -13303,6 +13515,8 @@ static void __set_next_task_fair(struct rq *rq, struct task_struct *p, bool firs
> >  {
> >         struct sched_entity *se = &p->se;
> >
> > +       fair_remove_pushable_task(rq, p);
> > +
> >  #ifdef CONFIG_SMP
> >         if (task_on_rq_queued(p)) {
> >                 /*
> > @@ -13320,6 +13534,11 @@ static void __set_next_task_fair(struct rq *rq, struct task_struct *p, bool firs
> >         if (hrtick_enabled_fair(rq))
> >                 hrtick_start_fair(rq, p);
> >
> > +       /*
> > +        * Try to push prev task before checking misfit for next task as
> > +        * the migration of prev can make next fitting the CPU
> > +        */
> > +       fair_queue_pushable_tasks(rq);
> >         update_misfit_status(p, rq);
> >         sched_fair_update_stop_tick(rq, p);
> >  }
> > @@ -13350,6 +13569,7 @@ void init_cfs_rq(struct cfs_rq *cfs_rq)
> >         cfs_rq->tasks_timeline = RB_ROOT_CACHED;
> >         cfs_rq->min_vruntime = (u64)(-(1LL << 20));
> >  #ifdef CONFIG_SMP
> > +       plist_head_init(&cfs_rq->pushable_tasks);
> >         raw_spin_lock_init(&cfs_rq->removed.lock);
> >  #endif
> >  }
> > diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> > index ab16d3d0e51c..2db198dccf21 100644
> > --- a/kernel/sched/sched.h
> > +++ b/kernel/sched/sched.h
> > @@ -722,6 +722,8 @@ struct cfs_rq {
> >         struct list_head        leaf_cfs_rq_list;
> >         struct task_group       *tg;    /* group that "owns" this runqueue */
> >
> > +       struct plist_head       pushable_tasks;
> > +
> >         /* Locally cached copy of our task_group's idle value */
> >         int                     idle;
> >
> > --
> > 2.43.0
> >
> >

Re: [PATCH 5/7 v5] sched/fair: Add push task mechanism for EAS

Posted by Xuewen Yan 10 months ago

Hi Vincent,

On Tue, Apr 15, 2025 at 9:51 PM Vincent Guittot
<vincent.guittot@linaro.org> wrote:
>

> > I am carefully studying this series of patches. I have some doubts
> > about this part.
> >
> > Need we check the state?
> > READ_ONCE(p->__state) != TASK_RUNNING;
> > Because the tick will check it.
> >
> > On the other hand, need we check the sched_delayed?
> > Because it also checks it in put_prev_task_fair().
>
> In the case of tick, the task is the current task and the only one running
>

If the following occurs:
set_current_state(TASK_INTERRUPTIBLE);
schedule();
    __schedule();
        local_irq_disable();

the tick occurs between set_current_state() and local_irq_disable(),
maybe we do not need to migrate it.


BR
---
xuewen

Re: [PATCH 5/7 v5] sched/fair: Add push task mechanism for EAS

Posted by Christian Loehle 10 months, 3 weeks ago

On 3/2/25 21:05, Vincent Guittot wrote:
> EAS is based on wakeup events to efficiently place tasks on the system, but
> there are cases where a task doesn't have wakeup events anymore or at a far
> too low pace. For such situation, we can take advantage of the task being
> put back in the enqueued list to check if it should be pushed on another
> CPU. When the task is alone on the CPU, it's never put back in the enqueued
> list; In this special case, we use the tick to run the check.
> 
> Wake up events remain the main way to migrate tasks but we now detect
> situation where a task is stuck on a CPU by checking that its utilization
> is larger than the max available compute capacity (max cpu capacity or
> uclamp max setting)
> 
> Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
> ---
>  kernel/sched/fair.c  | 220 +++++++++++++++++++++++++++++++++++++++++++
>  kernel/sched/sched.h |   2 +
>  2 files changed, 222 insertions(+)
> 
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index a9b97bbc085f..c3e383b86808 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -7051,6 +7051,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
>  	hrtick_update(rq);
>  }
>  
> +static void fair_remove_pushable_task(struct rq *rq, struct task_struct *p);
>  static void set_next_buddy(struct sched_entity *se);
>  
>  /*
> @@ -7081,6 +7082,8 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
>  		h_nr_idle = task_has_idle_policy(p);
>  		if (task_sleep || task_delayed || !se->sched_delayed)
>  			h_nr_runnable = 1;
> +
> +		fair_remove_pushable_task(rq, p);
>  	} else {
>  		cfs_rq = group_cfs_rq(se);
>  		slice = cfs_rq_min_slice(cfs_rq);
> @@ -8589,6 +8592,197 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
>  	return target;
>  }
>  
> +static inline bool task_stuck_on_cpu(struct task_struct *p, int cpu)
> +{
> +	unsigned long max_capa, util;
> +
> +	max_capa = min(get_actual_cpu_capacity(cpu),
> +		       uclamp_eff_value(p, UCLAMP_MAX));
> +	util = max(task_util_est(p), task_runnable(p));
> +
> +	/*
> +	 * Return true only if the task might not sleep/wakeup because of a low
> +	 * compute capacity. Tasks, which wake up regularly, will be handled by
> +	 * feec().
> +	 */
> +	return (util > max_capa);
> +}
> +
> +static inline bool sched_energy_push_task(struct task_struct *p, struct rq *rq)
> +{
> +	if (p->nr_cpus_allowed == 1)
> +		return false;
> +
> +	if (is_rd_overutilized(rq->rd))
> +		return false;
> +
> +	if (task_stuck_on_cpu(p, cpu_of(rq)))
> +		return true;
> +
> +	return false;
> +}
> +
> +static int active_load_balance_cpu_stop(void *data);
> +
> +static inline void check_pushable_task(struct task_struct *p, struct rq *rq)
> +{
> +	int new_cpu, cpu = cpu_of(rq);
> +
> +	if (!sched_energy_enabled())
> +		return;
> +
> +	if (WARN_ON(!p))
> +		return;
> +
> +	if (WARN_ON(!task_current(rq, p)))
> +		return;
> +
> +	if (is_migration_disabled(p))
> +		return;
> +
> +	/* If there are several task, wait for being put back */
> +	if (rq->nr_running > 1)
> +		return;
> +
> +	if (!sched_energy_push_task(p, rq))
> +		return;
> +
> +	new_cpu = find_energy_efficient_cpu(p, cpu);
> +
> +	if (new_cpu == cpu)
> +		return;
> +
> +	/*
> +	 * ->active_balance synchronizes accesses to
> +	 * ->active_balance_work.  Once set, it's cleared
> +	 * only after active load balance is finished.
> +	 */
> +	if (!rq->active_balance) {
> +		rq->active_balance = 1;
> +		rq->push_cpu = new_cpu;
> +	} else
> +		return;
> +
> +	raw_spin_rq_unlock(rq);
> +	stop_one_cpu_nowait(cpu,
> +		active_load_balance_cpu_stop, rq,
> +		&rq->active_balance_work);
> +	raw_spin_rq_lock(rq);
> +}
> +
> +static inline int has_pushable_tasks(struct rq *rq)
> +{
> +	return !plist_head_empty(&rq->cfs.pushable_tasks);
> +}
> +
> +static struct task_struct *pick_next_pushable_fair_task(struct rq *rq)
> +{
> +	struct task_struct *p;
> +
> +	if (!has_pushable_tasks(rq))
> +		return NULL;
> +
> +	p = plist_first_entry(&rq->cfs.pushable_tasks,
> +			      struct task_struct, pushable_tasks);
> +
> +	WARN_ON_ONCE(rq->cpu != task_cpu(p));
> +	WARN_ON_ONCE(task_current(rq, p));
> +	WARN_ON_ONCE(p->nr_cpus_allowed <= 1);
> +	WARN_ON_ONCE(!task_on_rq_queued(p));
> +
> +	/*
> +	 * Remove task from the pushable list as we try only once after that
> +	 * the task has been put back in enqueued list.
> +	 */
> +	plist_del(&p->pushable_tasks, &rq->cfs.pushable_tasks);
> +
> +	return p;
> +}
> +
> +/*
> + * See if the non running fair tasks on this rq can be sent on other CPUs
> + * that fits better with their profile.
> + */
> +static bool push_fair_task(struct rq *rq)
> +{
> +	struct task_struct *next_task;
> +	int prev_cpu, new_cpu;
> +	struct rq *new_rq;
> +
> +	next_task = pick_next_pushable_fair_task(rq);
> +	if (!next_task)
> +		return false;
> +
> +	if (is_migration_disabled(next_task))
> +		return true;
> +
> +	/* We might release rq lock */
> +	get_task_struct(next_task);
> +
> +	prev_cpu = rq->cpu;
> +
> +	new_cpu = find_energy_efficient_cpu(next_task, prev_cpu);

We aren't gating this on a overutilized check for both call sites of this patch
like the other feec() call and testing shows that this calls feec when OU
relatively often.
Why would it be OK to call feec() here when it isn't on task placement?

> +
> +	if (new_cpu == prev_cpu)
> +		goto out;
> +
> +	new_rq = cpu_rq(new_cpu);
> +
> +	if (double_lock_balance(rq, new_rq)) {
> +		/* The task has already migrated in between */
> +		if (task_cpu(next_task) != rq->cpu) {
> +			double_unlock_balance(rq, new_rq);
> +			goto out;
> +		}
> +
> +		deactivate_task(rq, next_task, 0);
> +		set_task_cpu(next_task, new_cpu);
> +		activate_task(new_rq, next_task, 0);
> +
> +		resched_curr(new_rq);
> +
> +		double_unlock_balance(rq, new_rq);
> +	}
> +
> +out:
> +	put_task_struct(next_task);
> +
> +	return true;
> +}
> +
> +static void push_fair_tasks(struct rq *rq)
> +{
> +	/* push_fair_task() will return true if it moved a fair task */

This isn't technically true, a bit of a nit, push_fair_task() also
will return true when the task found wasn't moveable.

[snip]

Re: [PATCH 5/7 v5] sched/fair: Add push task mechanism for EAS

Posted by Vincent Guittot 10 months ago

On Tue, 25 Mar 2025 at 12:16, Christian Loehle <christian.loehle@arm.com> wrote:
>
> On 3/2/25 21:05, Vincent Guittot wrote:
> > EAS is based on wakeup events to efficiently place tasks on the system, but
> > there are cases where a task doesn't have wakeup events anymore or at a far
> > too low pace. For such situation, we can take advantage of the task being
> > put back in the enqueued list to check if it should be pushed on another
> > CPU. When the task is alone on the CPU, it's never put back in the enqueued
> > list; In this special case, we use the tick to run the check.
> >
> > Wake up events remain the main way to migrate tasks but we now detect
> > situation where a task is stuck on a CPU by checking that its utilization
> > is larger than the max available compute capacity (max cpu capacity or
> > uclamp max setting)
> >
> > Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
> > ---
> >  kernel/sched/fair.c  | 220 +++++++++++++++++++++++++++++++++++++++++++
> >  kernel/sched/sched.h |   2 +
> >  2 files changed, 222 insertions(+)
> >
> > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> > index a9b97bbc085f..c3e383b86808 100644
> > --- a/kernel/sched/fair.c
> > +++ b/kernel/sched/fair.c
> > @@ -7051,6 +7051,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
> >       hrtick_update(rq);
> >  }
> >
> > +static void fair_remove_pushable_task(struct rq *rq, struct task_struct *p);
> >  static void set_next_buddy(struct sched_entity *se);
> >
> >  /*
> > @@ -7081,6 +7082,8 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
> >               h_nr_idle = task_has_idle_policy(p);
> >               if (task_sleep || task_delayed || !se->sched_delayed)
> >                       h_nr_runnable = 1;
> > +
> > +             fair_remove_pushable_task(rq, p);
> >       } else {
> >               cfs_rq = group_cfs_rq(se);
> >               slice = cfs_rq_min_slice(cfs_rq);
> > @@ -8589,6 +8592,197 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
> >       return target;
> >  }
> >
> > +static inline bool task_stuck_on_cpu(struct task_struct *p, int cpu)
> > +{
> > +     unsigned long max_capa, util;
> > +
> > +     max_capa = min(get_actual_cpu_capacity(cpu),
> > +                    uclamp_eff_value(p, UCLAMP_MAX));
> > +     util = max(task_util_est(p), task_runnable(p));
> > +
> > +     /*
> > +      * Return true only if the task might not sleep/wakeup because of a low
> > +      * compute capacity. Tasks, which wake up regularly, will be handled by
> > +      * feec().
> > +      */
> > +     return (util > max_capa);
> > +}
> > +
> > +static inline bool sched_energy_push_task(struct task_struct *p, struct rq *rq)
> > +{
> > +     if (p->nr_cpus_allowed == 1)
> > +             return false;
> > +
> > +     if (is_rd_overutilized(rq->rd))
> > +             return false;
> > +
> > +     if (task_stuck_on_cpu(p, cpu_of(rq)))
> > +             return true;
> > +
> > +     return false;
> > +}
> > +
> > +static int active_load_balance_cpu_stop(void *data);
> > +
> > +static inline void check_pushable_task(struct task_struct *p, struct rq *rq)
> > +{
> > +     int new_cpu, cpu = cpu_of(rq);
> > +
> > +     if (!sched_energy_enabled())
> > +             return;
> > +
> > +     if (WARN_ON(!p))
> > +             return;
> > +
> > +     if (WARN_ON(!task_current(rq, p)))
> > +             return;
> > +
> > +     if (is_migration_disabled(p))
> > +             return;
> > +
> > +     /* If there are several task, wait for being put back */
> > +     if (rq->nr_running > 1)
> > +             return;
> > +
> > +     if (!sched_energy_push_task(p, rq))
> > +             return;
> > +
> > +     new_cpu = find_energy_efficient_cpu(p, cpu);
> > +
> > +     if (new_cpu == cpu)
> > +             return;
> > +
> > +     /*
> > +      * ->active_balance synchronizes accesses to
> > +      * ->active_balance_work.  Once set, it's cleared
> > +      * only after active load balance is finished.
> > +      */
> > +     if (!rq->active_balance) {
> > +             rq->active_balance = 1;
> > +             rq->push_cpu = new_cpu;
> > +     } else
> > +             return;
> > +
> > +     raw_spin_rq_unlock(rq);
> > +     stop_one_cpu_nowait(cpu,
> > +             active_load_balance_cpu_stop, rq,
> > +             &rq->active_balance_work);
> > +     raw_spin_rq_lock(rq);
> > +}
> > +
> > +static inline int has_pushable_tasks(struct rq *rq)
> > +{
> > +     return !plist_head_empty(&rq->cfs.pushable_tasks);
> > +}
> > +
> > +static struct task_struct *pick_next_pushable_fair_task(struct rq *rq)
> > +{
> > +     struct task_struct *p;
> > +
> > +     if (!has_pushable_tasks(rq))
> > +             return NULL;
> > +
> > +     p = plist_first_entry(&rq->cfs.pushable_tasks,
> > +                           struct task_struct, pushable_tasks);
> > +
> > +     WARN_ON_ONCE(rq->cpu != task_cpu(p));
> > +     WARN_ON_ONCE(task_current(rq, p));
> > +     WARN_ON_ONCE(p->nr_cpus_allowed <= 1);
> > +     WARN_ON_ONCE(!task_on_rq_queued(p));
> > +
> > +     /*
> > +      * Remove task from the pushable list as we try only once after that
> > +      * the task has been put back in enqueued list.
> > +      */
> > +     plist_del(&p->pushable_tasks, &rq->cfs.pushable_tasks);
> > +
> > +     return p;
> > +}
> > +
> > +/*
> > + * See if the non running fair tasks on this rq can be sent on other CPUs
> > + * that fits better with their profile.
> > + */
> > +static bool push_fair_task(struct rq *rq)
> > +{
> > +     struct task_struct *next_task;
> > +     int prev_cpu, new_cpu;
> > +     struct rq *new_rq;
> > +
> > +     next_task = pick_next_pushable_fair_task(rq);
> > +     if (!next_task)
> > +             return false;
> > +
> > +     if (is_migration_disabled(next_task))
> > +             return true;
> > +
> > +     /* We might release rq lock */
> > +     get_task_struct(next_task);
> > +
> > +     prev_cpu = rq->cpu;
> > +
> > +     new_cpu = find_energy_efficient_cpu(next_task, prev_cpu);
>
> We aren't gating this on a overutilized check for both call sites of this patch

The overutilized check has been done when adding the task to the list.


> like the other feec() call and testing shows that this calls feec when OU
> relatively often.
> Why would it be OK to call feec() here when it isn't on task placement?
>
> > +
> > +     if (new_cpu == prev_cpu)
> > +             goto out;
> > +
> > +     new_rq = cpu_rq(new_cpu);
> > +
> > +     if (double_lock_balance(rq, new_rq)) {
> > +             /* The task has already migrated in between */
> > +             if (task_cpu(next_task) != rq->cpu) {
> > +                     double_unlock_balance(rq, new_rq);
> > +                     goto out;
> > +             }
> > +
> > +             deactivate_task(rq, next_task, 0);
> > +             set_task_cpu(next_task, new_cpu);
> > +             activate_task(new_rq, next_task, 0);
> > +
> > +             resched_curr(new_rq);
> > +
> > +             double_unlock_balance(rq, new_rq);
> > +     }
> > +
> > +out:
> > +     put_task_struct(next_task);
> > +
> > +     return true;
> > +}
> > +
> > +static void push_fair_tasks(struct rq *rq)
> > +{
> > +     /* push_fair_task() will return true if it moved a fair task */
>
> This isn't technically true, a bit of a nit, push_fair_task() also
> will return true when the task found wasn't moveable.
>
> [snip]

Re: [PATCH 5/7 v5] sched/fair: Add push task mechanism for EAS

Posted by Christian Loehle 9 months, 4 weeks ago

On 4/15/25 14:52, Vincent Guittot wrote:
> On Tue, 25 Mar 2025 at 12:16, Christian Loehle <christian.loehle@arm.com> wrote:
>>
>> On 3/2/25 21:05, Vincent Guittot wrote:
>>> EAS is based on wakeup events to efficiently place tasks on the system, but
>>> there are cases where a task doesn't have wakeup events anymore or at a far
>>> too low pace. For such situation, we can take advantage of the task being
>>> put back in the enqueued list to check if it should be pushed on another
>>> CPU. When the task is alone on the CPU, it's never put back in the enqueued
>>> list; In this special case, we use the tick to run the check.
>>>
>>> Wake up events remain the main way to migrate tasks but we now detect
>>> situation where a task is stuck on a CPU by checking that its utilization
>>> is larger than the max available compute capacity (max cpu capacity or
>>> uclamp max setting)
>>>
>>> Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
>>> ---
>>>  kernel/sched/fair.c  | 220 +++++++++++++++++++++++++++++++++++++++++++
>>>  kernel/sched/sched.h |   2 +
>>>  2 files changed, 222 insertions(+)
>>>
>>> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
>>> index a9b97bbc085f..c3e383b86808 100644
>>> --- a/kernel/sched/fair.c
>>> +++ b/kernel/sched/fair.c
>>> @@ -7051,6 +7051,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
>>>       hrtick_update(rq);
>>>  }
>>>
>>> +static void fair_remove_pushable_task(struct rq *rq, struct task_struct *p);
>>>  static void set_next_buddy(struct sched_entity *se);
>>>
>>>  /*
>>> @@ -7081,6 +7082,8 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
>>>               h_nr_idle = task_has_idle_policy(p);
>>>               if (task_sleep || task_delayed || !se->sched_delayed)
>>>                       h_nr_runnable = 1;
>>> +
>>> +             fair_remove_pushable_task(rq, p);
>>>       } else {
>>>               cfs_rq = group_cfs_rq(se);
>>>               slice = cfs_rq_min_slice(cfs_rq);
>>> @@ -8589,6 +8592,197 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
>>>       return target;
>>>  }
>>>
>>> +static inline bool task_stuck_on_cpu(struct task_struct *p, int cpu)
>>> +{
>>> +     unsigned long max_capa, util;
>>> +
>>> +     max_capa = min(get_actual_cpu_capacity(cpu),
>>> +                    uclamp_eff_value(p, UCLAMP_MAX));
>>> +     util = max(task_util_est(p), task_runnable(p));
>>> +
>>> +     /*
>>> +      * Return true only if the task might not sleep/wakeup because of a low
>>> +      * compute capacity. Tasks, which wake up regularly, will be handled by
>>> +      * feec().
>>> +      */
>>> +     return (util > max_capa);
>>> +}
>>> +
>>> +static inline bool sched_energy_push_task(struct task_struct *p, struct rq *rq)
>>> +{
>>> +     if (p->nr_cpus_allowed == 1)
>>> +             return false;
>>> +
>>> +     if (is_rd_overutilized(rq->rd))
>>> +             return false;
>>> +
>>> +     if (task_stuck_on_cpu(p, cpu_of(rq)))
>>> +             return true;
>>> +
>>> +     return false;
>>> +}
>>> +
>>> +static int active_load_balance_cpu_stop(void *data);
>>> +
>>> +static inline void check_pushable_task(struct task_struct *p, struct rq *rq)
>>> +{
>>> +     int new_cpu, cpu = cpu_of(rq);
>>> +
>>> +     if (!sched_energy_enabled())
>>> +             return;
>>> +
>>> +     if (WARN_ON(!p))
>>> +             return;
>>> +
>>> +     if (WARN_ON(!task_current(rq, p)))
>>> +             return;
>>> +
>>> +     if (is_migration_disabled(p))
>>> +             return;
>>> +
>>> +     /* If there are several task, wait for being put back */
>>> +     if (rq->nr_running > 1)
>>> +             return;
>>> +
>>> +     if (!sched_energy_push_task(p, rq))
>>> +             return;
>>> +
>>> +     new_cpu = find_energy_efficient_cpu(p, cpu);
>>> +
>>> +     if (new_cpu == cpu)
>>> +             return;
>>> +
>>> +     /*
>>> +      * ->active_balance synchronizes accesses to
>>> +      * ->active_balance_work.  Once set, it's cleared
>>> +      * only after active load balance is finished.
>>> +      */
>>> +     if (!rq->active_balance) {
>>> +             rq->active_balance = 1;
>>> +             rq->push_cpu = new_cpu;
>>> +     } else
>>> +             return;
>>> +
>>> +     raw_spin_rq_unlock(rq);
>>> +     stop_one_cpu_nowait(cpu,
>>> +             active_load_balance_cpu_stop, rq,
>>> +             &rq->active_balance_work);
>>> +     raw_spin_rq_lock(rq);
>>> +}
>>> +
>>> +static inline int has_pushable_tasks(struct rq *rq)
>>> +{
>>> +     return !plist_head_empty(&rq->cfs.pushable_tasks);
>>> +}
>>> +
>>> +static struct task_struct *pick_next_pushable_fair_task(struct rq *rq)
>>> +{
>>> +     struct task_struct *p;
>>> +
>>> +     if (!has_pushable_tasks(rq))
>>> +             return NULL;
>>> +
>>> +     p = plist_first_entry(&rq->cfs.pushable_tasks,
>>> +                           struct task_struct, pushable_tasks);
>>> +
>>> +     WARN_ON_ONCE(rq->cpu != task_cpu(p));
>>> +     WARN_ON_ONCE(task_current(rq, p));
>>> +     WARN_ON_ONCE(p->nr_cpus_allowed <= 1);
>>> +     WARN_ON_ONCE(!task_on_rq_queued(p));
>>> +
>>> +     /*
>>> +      * Remove task from the pushable list as we try only once after that
>>> +      * the task has been put back in enqueued list.
>>> +      */
>>> +     plist_del(&p->pushable_tasks, &rq->cfs.pushable_tasks);
>>> +
>>> +     return p;
>>> +}
>>> +
>>> +/*
>>> + * See if the non running fair tasks on this rq can be sent on other CPUs
>>> + * that fits better with their profile.
>>> + */
>>> +static bool push_fair_task(struct rq *rq)
>>> +{
>>> +     struct task_struct *next_task;
>>> +     int prev_cpu, new_cpu;
>>> +     struct rq *new_rq;
>>> +
>>> +     next_task = pick_next_pushable_fair_task(rq);
>>> +     if (!next_task)
>>> +             return false;
>>> +
>>> +     if (is_migration_disabled(next_task))
>>> +             return true;
>>> +
>>> +     /* We might release rq lock */
>>> +     get_task_struct(next_task);
>>> +
>>> +     prev_cpu = rq->cpu;
>>> +
>>> +     new_cpu = find_energy_efficient_cpu(next_task, prev_cpu);
>>
>> We aren't gating this on a overutilized check for both call sites of this patch
> 
> The overutilized check has been done when adding the task to the list.
> 

Right, but that was earlier?
Shouldn't we just clear the list on OU since lb is now active again?
(I do understand that this impacts the effectiveness here, but it seems
the correct thing to do?)

Re: [PATCH 5/7 v5] sched/fair: Add push task mechanism for EAS

Posted by Christian Loehle 10 months, 3 weeks ago

On 3/2/25 21:05, Vincent Guittot wrote:
> EAS is based on wakeup events to efficiently place tasks on the system, but
> there are cases where a task doesn't have wakeup events anymore or at a far
> too low pace. For such situation, we can take advantage of the task being
> put back in the enqueued list to check if it should be pushed on another
> CPU. When the task is alone on the CPU, it's never put back in the enqueued
> list; In this special case, we use the tick to run the check.
> 
> Wake up events remain the main way to migrate tasks but we now detect
> situation where a task is stuck on a CPU by checking that its utilization
> is larger than the max available compute capacity (max cpu capacity or
> uclamp max setting)
> 
> Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>

So during OSPM25 there was a discussion around this saving energy (even for
!UCLAMP_MAX tasks) because currently when a task has an increased compute
demand all of a sudden, we let it trigger the inefficient OPPs on little and
mid CPUs until they are misfit or the system is overutilized and CAS kicks in.
In particular I've presented a workload (internal VideoScroller which loads a
new video every 3s) which performs worse on power with EAS than CAS. Ignoring
overutilized while attempting feec does help a bit. (-5% energy with CAS, -2%
energy with feec() during OU). This push mechanism was also mentioned to
mitigate such situations.
In theory I agree, but I'm afraid it doesn't help in my testing.
Throughout various workloads where the described issue would appear the push
mechanism is only triggered around once every 2 minutes (i.e. absolutely
negligible).
In particular within 1 hour of testing I've only seen 5 pushed tasks that
fit the described scenarios ("Going from inefficient OPPs on little/mid to
more efficient OPPs on the more capable CPUs"). The described scenario is
very common (triggering at least every few seconds during many workloads).
The vast majority of pushed tasks were pushed within a cluster.
This was on Pixel 6.

> ---
>  kernel/sched/fair.c  | 220 +++++++++++++++++++++++++++++++++++++++++++
>  kernel/sched/sched.h |   2 +
>  2 files changed, 222 insertions(+)
> 
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index a9b97bbc085f..c3e383b86808 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -7051,6 +7051,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
>  	hrtick_update(rq);
>  }
>  
> +static void fair_remove_pushable_task(struct rq *rq, struct task_struct *p);
>  static void set_next_buddy(struct sched_entity *se);
>  
>  /*
> @@ -7081,6 +7082,8 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
>  		h_nr_idle = task_has_idle_policy(p);
>  		if (task_sleep || task_delayed || !se->sched_delayed)
>  			h_nr_runnable = 1;
> +
> +		fair_remove_pushable_task(rq, p);
>  	} else {
>  		cfs_rq = group_cfs_rq(se);
>  		slice = cfs_rq_min_slice(cfs_rq);
> @@ -8589,6 +8592,197 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
>  	return target;
>  }
>  
> +static inline bool task_stuck_on_cpu(struct task_struct *p, int cpu)
> +{
> +	unsigned long max_capa, util;
> +
> +	max_capa = min(get_actual_cpu_capacity(cpu),
> +		       uclamp_eff_value(p, UCLAMP_MAX));
> +	util = max(task_util_est(p), task_runnable(p));
> +
> +	/*
> +	 * Return true only if the task might not sleep/wakeup because of a low
> +	 * compute capacity. Tasks, which wake up regularly, will be handled by
> +	 * feec().
> +	 */
> +	return (util > max_capa);
> +}
> +
> +static inline bool sched_energy_push_task(struct task_struct *p, struct rq *rq)
> +{
> +	if (p->nr_cpus_allowed == 1)
> +		return false;
> +
> +	if (is_rd_overutilized(rq->rd))
> +		return false;
> +
> +	if (task_stuck_on_cpu(p, cpu_of(rq)))
> +		return true;
> +
> +	return false;
> +}
> +
> +static int active_load_balance_cpu_stop(void *data);
> +
> +static inline void check_pushable_task(struct task_struct *p, struct rq *rq)
> +{
> +	int new_cpu, cpu = cpu_of(rq);
> +
> +	if (!sched_energy_enabled())
> +		return;
> +
> +	if (WARN_ON(!p))
> +		return;
> +
> +	if (WARN_ON(!task_current(rq, p)))
> +		return;
> +
> +	if (is_migration_disabled(p))
> +		return;
> +
> +	/* If there are several task, wait for being put back */
> +	if (rq->nr_running > 1)
> +		return;
> +
> +	if (!sched_energy_push_task(p, rq))
> +		return;
> +
> +	new_cpu = find_energy_efficient_cpu(p, cpu);
> +
> +	if (new_cpu == cpu)
> +		return;
> +
> +	/*
> +	 * ->active_balance synchronizes accesses to
> +	 * ->active_balance_work.  Once set, it's cleared
> +	 * only after active load balance is finished.
> +	 */
> +	if (!rq->active_balance) {
> +		rq->active_balance = 1;
> +		rq->push_cpu = new_cpu;
> +	} else
> +		return;
> +
> +	raw_spin_rq_unlock(rq);
> +	stop_one_cpu_nowait(cpu,
> +		active_load_balance_cpu_stop, rq,
> +		&rq->active_balance_work);
> +	raw_spin_rq_lock(rq);
> +}
> +
> +static inline int has_pushable_tasks(struct rq *rq)
> +{
> +	return !plist_head_empty(&rq->cfs.pushable_tasks);
> +}
> +
> +static struct task_struct *pick_next_pushable_fair_task(struct rq *rq)
> +{
> +	struct task_struct *p;
> +
> +	if (!has_pushable_tasks(rq))
> +		return NULL;
> +
> +	p = plist_first_entry(&rq->cfs.pushable_tasks,
> +			      struct task_struct, pushable_tasks);
> +
> +	WARN_ON_ONCE(rq->cpu != task_cpu(p));
> +	WARN_ON_ONCE(task_current(rq, p));
> +	WARN_ON_ONCE(p->nr_cpus_allowed <= 1);
> +	WARN_ON_ONCE(!task_on_rq_queued(p));
> +
> +	/*
> +	 * Remove task from the pushable list as we try only once after that
> +	 * the task has been put back in enqueued list.
> +	 */
> +	plist_del(&p->pushable_tasks, &rq->cfs.pushable_tasks);
> +
> +	return p;
> +}
> +
> +/*
> + * See if the non running fair tasks on this rq can be sent on other CPUs
> + * that fits better with their profile.
> + */
> +static bool push_fair_task(struct rq *rq)
> +{
> +	struct task_struct *next_task;
> +	int prev_cpu, new_cpu;
> +	struct rq *new_rq;
> +
> +	next_task = pick_next_pushable_fair_task(rq);
> +	if (!next_task)
> +		return false;
> +
> +	if (is_migration_disabled(next_task))
> +		return true;
> +
> +	/* We might release rq lock */
> +	get_task_struct(next_task);
> +
> +	prev_cpu = rq->cpu;
> +
> +	new_cpu = find_energy_efficient_cpu(next_task, prev_cpu);
> +
> +	if (new_cpu == prev_cpu)
> +		goto out;
> +
> +	new_rq = cpu_rq(new_cpu);
> +
> +	if (double_lock_balance(rq, new_rq)) {
> +		/* The task has already migrated in between */
> +		if (task_cpu(next_task) != rq->cpu) {
> +			double_unlock_balance(rq, new_rq);
> +			goto out;
> +		}
> +
> +		deactivate_task(rq, next_task, 0);
> +		set_task_cpu(next_task, new_cpu);
> +		activate_task(new_rq, next_task, 0);
> +
> +		resched_curr(new_rq);
> +
> +		double_unlock_balance(rq, new_rq);
> +	}
> +
> +out:
> +	put_task_struct(next_task);
> +
> +	return true;
> +}
> +
> +static void push_fair_tasks(struct rq *rq)
> +{
> +	/* push_fair_task() will return true if it moved a fair task */
> +	while (push_fair_task(rq))
> +		;
> +}
> +
> +static DEFINE_PER_CPU(struct balance_callback, fair_push_head);
> +
> +static inline void fair_queue_pushable_tasks(struct rq *rq)
> +{
> +	if (!sched_energy_enabled() || !has_pushable_tasks(rq))
> +		return;
> +
> +	queue_balance_callback(rq, &per_cpu(fair_push_head, rq->cpu), push_fair_tasks);
> +}
> +static void fair_remove_pushable_task(struct rq *rq, struct task_struct *p)
> +{
> +	if (sched_energy_enabled())
> +		plist_del(&p->pushable_tasks, &rq->cfs.pushable_tasks);
> +}
> +
> +static void fair_add_pushable_task(struct rq *rq, struct task_struct *p)
> +{
> +	if (sched_energy_enabled() && task_on_rq_queued(p) && !p->se.sched_delayed) {
> +		if (sched_energy_push_task(p, rq)) {
> +			plist_del(&p->pushable_tasks, &rq->cfs.pushable_tasks);
> +			plist_node_init(&p->pushable_tasks, p->prio);
> +			plist_add(&p->pushable_tasks, &rq->cfs.pushable_tasks);
> +		}
> +	}
> +}
> +
>  /*
>   * select_task_rq_fair: Select target runqueue for the waking task in domains
>   * that have the relevant SD flag set. In practice, this is SD_BALANCE_WAKE,
> @@ -8758,6 +8952,10 @@ balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
>  	return sched_balance_newidle(rq, rf) != 0;
>  }
>  #else
> +static inline void check_pushable_task(struct task_struct *p, struct rq *rq) {}
> +static inline void fair_queue_pushable_tasks(struct rq *rq) {}
> +static void fair_remove_pushable_task(struct rq *rq, struct task_struct *p) {}
> +static inline void fair_add_pushable_task(struct rq *rq, struct task_struct *p) {}
>  static inline void set_task_max_allowed_capacity(struct task_struct *p) {}
>  #endif /* CONFIG_SMP */
>  
> @@ -8947,6 +9145,12 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf
>  		put_prev_entity(cfs_rq, pse);
>  		set_next_entity(cfs_rq, se);
>  
> +		/*
> +		 * The previous task might be eligible for being pushed on
> +		 * another cpu if it is still active.
> +		 */
> +		fair_add_pushable_task(rq, prev);
> +
>  		__set_next_task_fair(rq, p, true);
>  	}
>  
> @@ -9019,6 +9223,13 @@ static void put_prev_task_fair(struct rq *rq, struct task_struct *prev, struct t
>  		cfs_rq = cfs_rq_of(se);
>  		put_prev_entity(cfs_rq, se);
>  	}
> +
> +	/*
> +	 * The previous task might be eligible for being pushed on another cpu
> +	 * if it is still active.
> +	 */
> +	fair_add_pushable_task(rq, prev);
> +
>  }
>  
>  /*
> @@ -13151,6 +13362,7 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
>  	if (static_branch_unlikely(&sched_numa_balancing))
>  		task_tick_numa(rq, curr);
>  
> +	check_pushable_task(curr, rq);
>  	update_misfit_status(curr, rq);
>  	check_update_overutilized_status(task_rq(curr));
>  
> @@ -13303,6 +13515,8 @@ static void __set_next_task_fair(struct rq *rq, struct task_struct *p, bool firs
>  {
>  	struct sched_entity *se = &p->se;
>  
> +	fair_remove_pushable_task(rq, p);
> +
>  #ifdef CONFIG_SMP
>  	if (task_on_rq_queued(p)) {
>  		/*
> @@ -13320,6 +13534,11 @@ static void __set_next_task_fair(struct rq *rq, struct task_struct *p, bool firs
>  	if (hrtick_enabled_fair(rq))
>  		hrtick_start_fair(rq, p);
>  
> +	/*
> +	 * Try to push prev task before checking misfit for next task as
> +	 * the migration of prev can make next fitting the CPU
> +	 */
> +	fair_queue_pushable_tasks(rq);
>  	update_misfit_status(p, rq);
>  	sched_fair_update_stop_tick(rq, p);
>  }
> @@ -13350,6 +13569,7 @@ void init_cfs_rq(struct cfs_rq *cfs_rq)
>  	cfs_rq->tasks_timeline = RB_ROOT_CACHED;
>  	cfs_rq->min_vruntime = (u64)(-(1LL << 20));
>  #ifdef CONFIG_SMP
> +	plist_head_init(&cfs_rq->pushable_tasks);
>  	raw_spin_lock_init(&cfs_rq->removed.lock);
>  #endif
>  }
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index ab16d3d0e51c..2db198dccf21 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -722,6 +722,8 @@ struct cfs_rq {
>  	struct list_head	leaf_cfs_rq_list;
>  	struct task_group	*tg;	/* group that "owns" this runqueue */
>  
> +	struct plist_head	pushable_tasks;
> +
>  	/* Locally cached copy of our task_group's idle value */
>  	int			idle;
>

Re: [PATCH 5/7 v5] sched/fair: Add push task mechanism for EAS

Posted by Valentin Schneider 10 months, 3 weeks ago

On 02/03/25 22:05, Vincent Guittot wrote:
> +static struct task_struct *pick_next_pushable_fair_task(struct rq *rq)
> +{
> +	struct task_struct *p;
> +
> +	if (!has_pushable_tasks(rq))
> +		return NULL;
> +
> +	p = plist_first_entry(&rq->cfs.pushable_tasks,
> +			      struct task_struct, pushable_tasks);
> +
> +	WARN_ON_ONCE(rq->cpu != task_cpu(p));
> +	WARN_ON_ONCE(task_current(rq, p));
> +	WARN_ON_ONCE(p->nr_cpus_allowed <= 1);
> +	WARN_ON_ONCE(!task_on_rq_queued(p));
> +
> +	/*
> +	 * Remove task from the pushable list as we try only once after that
> +	 * the task has been put back in enqueued list.
> +	 */
> +	plist_del(&p->pushable_tasks, &rq->cfs.pushable_tasks);
> +
> +	return p;

I've only had a look at this patch per the OSPM newidle balance discussion;
coupled with something like RT/DL's overload cpumask, this could be a
viable newidle_balance() replacement.

Unfortunately this means we now have a third copy of the push mechanism
along with RT and DL, so a third place to manually patch whenever a bug is
fixed in one of them [1].

We could perhaps have a skeleton of the pushable list handling in
{enqueue,dequeue)_task() and put_prev_task(), with class-specific conditions and
backing storage, (plist vs rbtree) handled via class callbacks.

Or even make the whole pushable enqueue/dequeue its own class callback,
which would simplify [2].

[1]: http://lore.kernel.org/r/20250304103001.0f89e953@gandalf.local.home
[2]: https://lore.kernel.org/lkml/20250312221147.1865364-7-jstultz@google.com/

Re: [PATCH 5/7 v5] sched/fair: Add push task mechanism for EAS

Posted by Shrikanth Hegde 11 months ago

Hi Vincent, trying to understand this series. I see most of the places 
are with sched_energy_enabled() checks. So it shouldn't affect non-EAS 
systems.

> EAS is based on wakeup events to efficiently place tasks on the system, but
> there are cases where a task doesn't have wakeup events anymore or at a far
> too low pace. For such situation, we can take advantage of the task being
> put back in the enqueued list to check if it should be pushed on another
> CPU. When the task is alone on the CPU, it's never put back in the enqueued
> list; In this special case, we use the tick to run the check.
> 
> Wake up events remain the main way to migrate tasks but we now detect
> situation where a task is stuck on a CPU by checking that its utilization
> is larger than the max available compute capacity (max cpu capacity or
> uclamp max setting)
> 
> Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
> ---
>   kernel/sched/fair.c  | 220 +++++++++++++++++++++++++++++++++++++++++++
>   kernel/sched/sched.h |   2 +
>   2 files changed, 222 insertions(+)
> 
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index a9b97bbc085f..c3e383b86808 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -7051,6 +7051,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
>   	hrtick_update(rq);
>   }
>   
> +static void fair_remove_pushable_task(struct rq *rq, struct task_struct *p);
>   static void set_next_buddy(struct sched_entity *se);
>   
>   /*
> @@ -7081,6 +7082,8 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
>   		h_nr_idle = task_has_idle_policy(p);
>   		if (task_sleep || task_delayed || !se->sched_delayed)
>   			h_nr_runnable = 1;
> +
> +		fair_remove_pushable_task(rq, p);
>   	} else {
>   		cfs_rq = group_cfs_rq(se);
>   		slice = cfs_rq_min_slice(cfs_rq);
> @@ -8589,6 +8592,197 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
>   	return target;
>   }
>   
> +static inline bool task_stuck_on_cpu(struct task_struct *p, int cpu)
> +{
> +	unsigned long max_capa, util;
> +
> +	max_capa = min(get_actual_cpu_capacity(cpu),
> +		       uclamp_eff_value(p, UCLAMP_MAX));
> +	util = max(task_util_est(p), task_runnable(p));
> +
> +	/*
> +	 * Return true only if the task might not sleep/wakeup because of a low
> +	 * compute capacity. Tasks, which wake up regularly, will be handled by
> +	 * feec().
> +	 */
> +	return (util > max_capa);
> +}
> +
> +static inline bool sched_energy_push_task(struct task_struct *p, struct rq *rq)
> +{
> +	if (p->nr_cpus_allowed == 1)
> +		return false;
> +
> +	if (is_rd_overutilized(rq->rd))
> +		return false;
> +
> +	if (task_stuck_on_cpu(p, cpu_of(rq)))
> +		return true;
> +
> +	return false;
> +}
> +
> +static int active_load_balance_cpu_stop(void *data);
> +
> +static inline void check_pushable_task(struct task_struct *p, struct rq *rq)
> +{
> +	int new_cpu, cpu = cpu_of(rq);
> +
> +	if (!sched_energy_enabled())
> +		return;
> +
> +	if (WARN_ON(!p))
> +		return;
> +
> +	if (WARN_ON(!task_current(rq, p)))
> +		return;
> +
> +	if (is_migration_disabled(p))
> +		return;
> +
> +	/* If there are several task, wait for being put back */
> +	if (rq->nr_running > 1)
> +		return;
> +
> +	if (!sched_energy_push_task(p, rq))
> +		return;
> +
> +	new_cpu = find_energy_efficient_cpu(p, cpu);
> +
> +	if (new_cpu == cpu)
> +		return;
> +
> +	/*
> +	 * ->active_balance synchronizes accesses to
> +	 * ->active_balance_work.  Once set, it's cleared
> +	 * only after active load balance is finished.
> +	 */
> +	if (!rq->active_balance) {
> +		rq->active_balance = 1;
> +		rq->push_cpu = new_cpu;
> +	} else
> +		return;
> +

Does this need preempt disable/enable guards similar to sched_balance_rq?

> +	raw_spin_rq_unlock(rq);
> +	stop_one_cpu_nowait(cpu,
> +		active_load_balance_cpu_stop, rq,
> +		&rq->active_balance_work);
> +	raw_spin_rq_lock(rq);
> +}
> +
> +static inline int has_pushable_tasks(struct rq *rq)
> +{
> +	return !plist_head_empty(&rq->cfs.pushable_tasks);
> +}
> +
> +static struct task_struct *pick_next_pushable_fair_task(struct rq *rq)
> +{
> +	struct task_struct *p;
> +
> +	if (!has_pushable_tasks(rq))
> +		return NULL;
> +
> +	p = plist_first_entry(&rq->cfs.pushable_tasks,
> +			      struct task_struct, pushable_tasks);
> +
> +	WARN_ON_ONCE(rq->cpu != task_cpu(p));
> +	WARN_ON_ONCE(task_current(rq, p));
> +	WARN_ON_ONCE(p->nr_cpus_allowed <= 1);
> +	WARN_ON_ONCE(!task_on_rq_queued(p));
> +

Isnt it better to print it everytime? it could different process each 
time no?

> +	/*
> +	 * Remove task from the pushable list as we try only once after that
> +	 * the task has been put back in enqueued list.
> +	 */
> +	plist_del(&p->pushable_tasks, &rq->cfs.pushable_tasks);
> +
> +	return p;
> +}
> +
> +/*
> + * See if the non running fair tasks on this rq can be sent on other CPUs
> + * that fits better with their profile.
> + */
> +static bool push_fair_task(struct rq *rq)
> +{
> +	struct task_struct *next_task;
> +	int prev_cpu, new_cpu;
> +	struct rq *new_rq;
> +
> +	next_task = pick_next_pushable_fair_task(rq);
> +	if (!next_task)
> +		return false;
> +
> +	if (is_migration_disabled(next_task))
> +		return true;
> +
> +	/* We might release rq lock */
> +	get_task_struct(next_task);
> +
> +	prev_cpu = rq->cpu;
> +
> +	new_cpu = find_energy_efficient_cpu(next_task, prev_cpu);
> +
> +	if (new_cpu == prev_cpu)
> +		goto out;
> +
> +	new_rq = cpu_rq(new_cpu);
> +
> +	if (double_lock_balance(rq, new_rq)) {
> +		/* The task has already migrated in between */
> +		if (task_cpu(next_task) != rq->cpu) {
> +			double_unlock_balance(rq, new_rq);
> +			goto out;
> +		}
> +
> +		deactivate_task(rq, next_task, 0);
> +		set_task_cpu(next_task, new_cpu);
> +		activate_task(new_rq, next_task, 0);
> +
> +		resched_curr(new_rq);
> +
> +		double_unlock_balance(rq, new_rq);
> +	}
> +
> +out:
> +	put_task_struct(next_task);
> +
> +	return true;
> +}
> +
> +static void push_fair_tasks(struct rq *rq)
> +{
> +	/* push_fair_task() will return true if it moved a fair task */
> +	while (push_fair_task(rq))
> +		;
> +}
> +
> +static DEFINE_PER_CPU(struct balance_callback, fair_push_head);
> +
> +static inline void fair_queue_pushable_tasks(struct rq *rq)
> +{
> +	if (!sched_energy_enabled() || !has_pushable_tasks(rq))
> +		return;

has_pushable_task has any tasks iff sched_energy_enabled. so this check 
may not be needed. But it shouldnt hurt, since it is static key.

> +
> +	queue_balance_callback(rq, &per_cpu(fair_push_head, rq->cpu), push_fair_tasks);
> +}
> +static void fair_remove_pushable_task(struct rq *rq, struct task_struct *p)
> +{
> +	if (sched_energy_enabled())
> +		plist_del(&p->pushable_tasks, &rq->cfs.pushable_tasks);
> +}
> +
> +static void fair_add_pushable_task(struct rq *rq, struct task_struct *p)
> +{
> +	if (sched_energy_enabled() && task_on_rq_queued(p) && !p->se.sched_delayed) {
> +		if (sched_energy_push_task(p, rq)) {
> +			plist_del(&p->pushable_tasks, &rq->cfs.pushable_tasks);
> +			plist_node_init(&p->pushable_tasks, p->prio);
> +			plist_add(&p->pushable_tasks, &rq->cfs.pushable_tasks);
> +		}
> +	}
> +}
> +
>   /*
>    * select_task_rq_fair: Select target runqueue for the waking task in domains
>    * that have the relevant SD flag set. In practice, this is SD_BALANCE_WAKE,
> @@ -8758,6 +8952,10 @@ balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
>   	return sched_balance_newidle(rq, rf) != 0;
>   }
>   #else
> +static inline void check_pushable_task(struct task_struct *p, struct rq *rq) {}
> +static inline void fair_queue_pushable_tasks(struct rq *rq) {}
> +static void fair_remove_pushable_task(struct rq *rq, struct task_struct *p) {}
> +static inline void fair_add_pushable_task(struct rq *rq, struct task_struct *p) {}
>   static inline void set_task_max_allowed_capacity(struct task_struct *p) {}
>   #endif /* CONFIG_SMP */
>   
> @@ -8947,6 +9145,12 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf
>   		put_prev_entity(cfs_rq, pse);
>   		set_next_entity(cfs_rq, se);
>   
> +		/*
> +		 * The previous task might be eligible for being pushed on
> +		 * another cpu if it is still active.
> +		 */
> +		fair_add_pushable_task(rq, prev);
> +
>   		__set_next_task_fair(rq, p, true);
>   	}
>   
> @@ -9019,6 +9223,13 @@ static void put_prev_task_fair(struct rq *rq, struct task_struct *prev, struct t
>   		cfs_rq = cfs_rq_of(se);
>   		put_prev_entity(cfs_rq, se);
>   	}
> +
> +	/*
> +	 * The previous task might be eligible for being pushed on another cpu
> +	 * if it is still active.
> +	 */
> +	fair_add_pushable_task(rq, prev);
> +
>   }
>   
>   /*
> @@ -13151,6 +13362,7 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
>   	if (static_branch_unlikely(&sched_numa_balancing))
>   		task_tick_numa(rq, curr);
>   
> +	check_pushable_task(curr, rq);
>   	update_misfit_status(curr, rq);
>   	check_update_overutilized_status(task_rq(curr));
>   
> @@ -13303,6 +13515,8 @@ static void __set_next_task_fair(struct rq *rq, struct task_struct *p, bool firs
>   {
>   	struct sched_entity *se = &p->se;
>   
> +	fair_remove_pushable_task(rq, p);
> +
>   #ifdef CONFIG_SMP
>   	if (task_on_rq_queued(p)) {
>   		/*
> @@ -13320,6 +13534,11 @@ static void __set_next_task_fair(struct rq *rq, struct task_struct *p, bool firs
>   	if (hrtick_enabled_fair(rq))
>   		hrtick_start_fair(rq, p);
>   
> +	/*
> +	 * Try to push prev task before checking misfit for next task as
> +	 * the migration of prev can make next fitting the CPU
> +	 */
> +	fair_queue_pushable_tasks(rq);
>   	update_misfit_status(p, rq);
>   	sched_fair_update_stop_tick(rq, p);
>   }
> @@ -13350,6 +13569,7 @@ void init_cfs_rq(struct cfs_rq *cfs_rq)
>   	cfs_rq->tasks_timeline = RB_ROOT_CACHED;
>   	cfs_rq->min_vruntime = (u64)(-(1LL << 20));
>   #ifdef CONFIG_SMP
> +	plist_head_init(&cfs_rq->pushable_tasks);
>   	raw_spin_lock_init(&cfs_rq->removed.lock);
>   #endif
>   }
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index ab16d3d0e51c..2db198dccf21 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -722,6 +722,8 @@ struct cfs_rq {
>   	struct list_head	leaf_cfs_rq_list;
>   	struct task_group	*tg;	/* group that "owns" this runqueue */
>   
> +	struct plist_head	pushable_tasks;
> +
>   	/* Locally cached copy of our task_group's idle value */
>   	int			idle;
>

Re: [PATCH 5/7 v5] sched/fair: Add push task mechanism for EAS

Posted by Vincent Guittot 11 months ago

Hi Shrikanth,

On Mon, 10 Mar 2025 at 19:21, Shrikanth Hegde <sshegde@linux.ibm.com> wrote:
>
>
> Hi Vincent, trying to understand this series. I see most of the places
> are with sched_energy_enabled() checks. So it shouldn't affect non-EAS
> systems.
>
> > EAS is based on wakeup events to efficiently place tasks on the system, but
> > there are cases where a task doesn't have wakeup events anymore or at a far
> > too low pace. For such situation, we can take advantage of the task being
> > put back in the enqueued list to check if it should be pushed on another
> > CPU. When the task is alone on the CPU, it's never put back in the enqueued
> > list; In this special case, we use the tick to run the check.
> >
> > Wake up events remain the main way to migrate tasks but we now detect
> > situation where a task is stuck on a CPU by checking that its utilization
> > is larger than the max available compute capacity (max cpu capacity or
> > uclamp max setting)
> >
> > Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
> > ---
> >   kernel/sched/fair.c  | 220 +++++++++++++++++++++++++++++++++++++++++++
> >   kernel/sched/sched.h |   2 +
> >   2 files changed, 222 insertions(+)
> >
> > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> > index a9b97bbc085f..c3e383b86808 100644
> > --- a/kernel/sched/fair.c
> > +++ b/kernel/sched/fair.c
> > @@ -7051,6 +7051,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
> >       hrtick_update(rq);
> >   }
> >
> > +static void fair_remove_pushable_task(struct rq *rq, struct task_struct *p);
> >   static void set_next_buddy(struct sched_entity *se);
> >
> >   /*
> > @@ -7081,6 +7082,8 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
> >               h_nr_idle = task_has_idle_policy(p);
> >               if (task_sleep || task_delayed || !se->sched_delayed)
> >                       h_nr_runnable = 1;
> > +
> > +             fair_remove_pushable_task(rq, p);
> >       } else {
> >               cfs_rq = group_cfs_rq(se);
> >               slice = cfs_rq_min_slice(cfs_rq);
> > @@ -8589,6 +8592,197 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
> >       return target;
> >   }
> >
> > +static inline bool task_stuck_on_cpu(struct task_struct *p, int cpu)
> > +{
> > +     unsigned long max_capa, util;
> > +
> > +     max_capa = min(get_actual_cpu_capacity(cpu),
> > +                    uclamp_eff_value(p, UCLAMP_MAX));
> > +     util = max(task_util_est(p), task_runnable(p));
> > +
> > +     /*
> > +      * Return true only if the task might not sleep/wakeup because of a low
> > +      * compute capacity. Tasks, which wake up regularly, will be handled by
> > +      * feec().
> > +      */
> > +     return (util > max_capa);
> > +}
> > +
> > +static inline bool sched_energy_push_task(struct task_struct *p, struct rq *rq)
> > +{
> > +     if (p->nr_cpus_allowed == 1)
> > +             return false;
> > +
> > +     if (is_rd_overutilized(rq->rd))
> > +             return false;
> > +
> > +     if (task_stuck_on_cpu(p, cpu_of(rq)))
> > +             return true;
> > +
> > +     return false;
> > +}
> > +
> > +static int active_load_balance_cpu_stop(void *data);
> > +
> > +static inline void check_pushable_task(struct task_struct *p, struct rq *rq)
> > +{
> > +     int new_cpu, cpu = cpu_of(rq);
> > +
> > +     if (!sched_energy_enabled())
> > +             return;
> > +
> > +     if (WARN_ON(!p))
> > +             return;
> > +
> > +     if (WARN_ON(!task_current(rq, p)))
> > +             return;
> > +
> > +     if (is_migration_disabled(p))
> > +             return;
> > +
> > +     /* If there are several task, wait for being put back */
> > +     if (rq->nr_running > 1)
> > +             return;
> > +
> > +     if (!sched_energy_push_task(p, rq))
> > +             return;
> > +
> > +     new_cpu = find_energy_efficient_cpu(p, cpu);
> > +
> > +     if (new_cpu == cpu)
> > +             return;
> > +
> > +     /*
> > +      * ->active_balance synchronizes accesses to
> > +      * ->active_balance_work.  Once set, it's cleared
> > +      * only after active load balance is finished.
> > +      */
> > +     if (!rq->active_balance) {
> > +             rq->active_balance = 1;
> > +             rq->push_cpu = new_cpu;
> > +     } else
> > +             return;
> > +
>
> Does this need preempt disable/enable guards similar to sched_balance_rq?

Pierre asked me about this in the RFC version [1]. Preempt
enable/disable has been added by commit f0498d2a54e7 ("sched: Fix
stop_one_cpu_nowait() vs hotplug") and AFAIK we are safe with the use
case mentioned in the commit

[1] https://lore.kernel.org/lkml/ccf4095f-5fca-42f4-b9fe-aa93e703016e@arm.com/

>
> > +     raw_spin_rq_unlock(rq);
> > +     stop_one_cpu_nowait(cpu,
> > +             active_load_balance_cpu_stop, rq,
> > +             &rq->active_balance_work);
> > +     raw_spin_rq_lock(rq);
> > +}
> > +
> > +static inline int has_pushable_tasks(struct rq *rq)
> > +{
> > +     return !plist_head_empty(&rq->cfs.pushable_tasks);
> > +}
> > +
> > +static struct task_struct *pick_next_pushable_fair_task(struct rq *rq)
> > +{
> > +     struct task_struct *p;
> > +
> > +     if (!has_pushable_tasks(rq))
> > +             return NULL;
> > +
> > +     p = plist_first_entry(&rq->cfs.pushable_tasks,
> > +                           struct task_struct, pushable_tasks);
> > +
> > +     WARN_ON_ONCE(rq->cpu != task_cpu(p));
> > +     WARN_ON_ONCE(task_current(rq, p));
> > +     WARN_ON_ONCE(p->nr_cpus_allowed <= 1);
> > +     WARN_ON_ONCE(!task_on_rq_queued(p));
> > +
>
> Isnt it better to print it everytime? it could different process each
> time no?

This should never happen so once seems enough and it prevents
overloading the log.

>
> > +     /*
> > +      * Remove task from the pushable list as we try only once after that
> > +      * the task has been put back in enqueued list.
> > +      */
> > +     plist_del(&p->pushable_tasks, &rq->cfs.pushable_tasks);
> > +
> > +     return p;
> > +}
> > +
> > +/*
> > + * See if the non running fair tasks on this rq can be sent on other CPUs
> > + * that fits better with their profile.
> > + */
> > +static bool push_fair_task(struct rq *rq)
> > +{
> > +     struct task_struct *next_task;
> > +     int prev_cpu, new_cpu;
> > +     struct rq *new_rq;
> > +
> > +     next_task = pick_next_pushable_fair_task(rq);
> > +     if (!next_task)
> > +             return false;
> > +
> > +     if (is_migration_disabled(next_task))
> > +             return true;
> > +
> > +     /* We might release rq lock */
> > +     get_task_struct(next_task);
> > +
> > +     prev_cpu = rq->cpu;
> > +
> > +     new_cpu = find_energy_efficient_cpu(next_task, prev_cpu);
> > +
> > +     if (new_cpu == prev_cpu)
> > +             goto out;
> > +
> > +     new_rq = cpu_rq(new_cpu);
> > +
> > +     if (double_lock_balance(rq, new_rq)) {
> > +             /* The task has already migrated in between */
> > +             if (task_cpu(next_task) != rq->cpu) {
> > +                     double_unlock_balance(rq, new_rq);
> > +                     goto out;
> > +             }
> > +
> > +             deactivate_task(rq, next_task, 0);
> > +             set_task_cpu(next_task, new_cpu);
> > +             activate_task(new_rq, next_task, 0);
> > +
> > +             resched_curr(new_rq);
> > +
> > +             double_unlock_balance(rq, new_rq);
> > +     }
> > +
> > +out:
> > +     put_task_struct(next_task);
> > +
> > +     return true;
> > +}
> > +
> > +static void push_fair_tasks(struct rq *rq)
> > +{
> > +     /* push_fair_task() will return true if it moved a fair task */
> > +     while (push_fair_task(rq))
> > +             ;
> > +}
> > +
> > +static DEFINE_PER_CPU(struct balance_callback, fair_push_head);
> > +
> > +static inline void fair_queue_pushable_tasks(struct rq *rq)
> > +{
> > +     if (!sched_energy_enabled() || !has_pushable_tasks(rq))
> > +             return;
>
> has_pushable_task has any tasks iff sched_energy_enabled. so this check
> may not be needed. But it shouldnt hurt, since it is static key.

I didn't want to add the useless call of  has_pushable_tasks() even if
it should be cheap

>
> > +
> > +     queue_balance_callback(rq, &per_cpu(fair_push_head, rq->cpu), push_fair_tasks);
> > +}
> > +static void fair_remove_pushable_task(struct rq *rq, struct task_struct *p)
> > +{
> > +     if (sched_energy_enabled())
> > +             plist_del(&p->pushable_tasks, &rq->cfs.pushable_tasks);
> > +}
> > +
> > +static void fair_add_pushable_task(struct rq *rq, struct task_struct *p)
> > +{
> > +     if (sched_energy_enabled() && task_on_rq_queued(p) && !p->se.sched_delayed) {
> > +             if (sched_energy_push_task(p, rq)) {
> > +                     plist_del(&p->pushable_tasks, &rq->cfs.pushable_tasks);
> > +                     plist_node_init(&p->pushable_tasks, p->prio);
> > +                     plist_add(&p->pushable_tasks, &rq->cfs.pushable_tasks);
> > +             }
> > +     }
> > +}
> > +
> >   /*
> >    * select_task_rq_fair: Select target runqueue for the waking task in domains
> >    * that have the relevant SD flag set. In practice, this is SD_BALANCE_WAKE,
> > @@ -8758,6 +8952,10 @@ balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
> >       return sched_balance_newidle(rq, rf) != 0;
> >   }
> >   #else
> > +static inline void check_pushable_task(struct task_struct *p, struct rq *rq) {}
> > +static inline void fair_queue_pushable_tasks(struct rq *rq) {}
> > +static void fair_remove_pushable_task(struct rq *rq, struct task_struct *p) {}
> > +static inline void fair_add_pushable_task(struct rq *rq, struct task_struct *p) {}
> >   static inline void set_task_max_allowed_capacity(struct task_struct *p) {}
> >   #endif /* CONFIG_SMP */
> >
> > @@ -8947,6 +9145,12 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf
> >               put_prev_entity(cfs_rq, pse);
> >               set_next_entity(cfs_rq, se);
> >
> > +             /*
> > +              * The previous task might be eligible for being pushed on
> > +              * another cpu if it is still active.
> > +              */
> > +             fair_add_pushable_task(rq, prev);
> > +
> >               __set_next_task_fair(rq, p, true);
> >       }
> >
> > @@ -9019,6 +9223,13 @@ static void put_prev_task_fair(struct rq *rq, struct task_struct *prev, struct t
> >               cfs_rq = cfs_rq_of(se);
> >               put_prev_entity(cfs_rq, se);
> >       }
> > +
> > +     /*
> > +      * The previous task might be eligible for being pushed on another cpu
> > +      * if it is still active.
> > +      */
> > +     fair_add_pushable_task(rq, prev);
> > +
> >   }
> >
> >   /*
> > @@ -13151,6 +13362,7 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
> >       if (static_branch_unlikely(&sched_numa_balancing))
> >               task_tick_numa(rq, curr);
> >
> > +     check_pushable_task(curr, rq);
> >       update_misfit_status(curr, rq);
> >       check_update_overutilized_status(task_rq(curr));
> >
> > @@ -13303,6 +13515,8 @@ static void __set_next_task_fair(struct rq *rq, struct task_struct *p, bool firs
> >   {
> >       struct sched_entity *se = &p->se;
> >
> > +     fair_remove_pushable_task(rq, p);
> > +
> >   #ifdef CONFIG_SMP
> >       if (task_on_rq_queued(p)) {
> >               /*
> > @@ -13320,6 +13534,11 @@ static void __set_next_task_fair(struct rq *rq, struct task_struct *p, bool firs
> >       if (hrtick_enabled_fair(rq))
> >               hrtick_start_fair(rq, p);
> >
> > +     /*
> > +      * Try to push prev task before checking misfit for next task as
> > +      * the migration of prev can make next fitting the CPU
> > +      */
> > +     fair_queue_pushable_tasks(rq);
> >       update_misfit_status(p, rq);
> >       sched_fair_update_stop_tick(rq, p);
> >   }
> > @@ -13350,6 +13569,7 @@ void init_cfs_rq(struct cfs_rq *cfs_rq)
> >       cfs_rq->tasks_timeline = RB_ROOT_CACHED;
> >       cfs_rq->min_vruntime = (u64)(-(1LL << 20));
> >   #ifdef CONFIG_SMP
> > +     plist_head_init(&cfs_rq->pushable_tasks);
> >       raw_spin_lock_init(&cfs_rq->removed.lock);
> >   #endif
> >   }
> > diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> > index ab16d3d0e51c..2db198dccf21 100644
> > --- a/kernel/sched/sched.h
> > +++ b/kernel/sched/sched.h
> > @@ -722,6 +722,8 @@ struct cfs_rq {
> >       struct list_head        leaf_cfs_rq_list;
> >       struct task_group       *tg;    /* group that "owns" this runqueue */
> >
> > +     struct plist_head       pushable_tasks;
> > +
> >       /* Locally cached copy of our task_group's idle value */
> >       int                     idle;
> >
>

Re: [PATCH 5/7 v5] sched/fair: Add push task mechanism for EAS

Posted by kernel test robot 11 months ago

Hi Vincent,

kernel test robot noticed the following build errors:

[auto build test ERROR on tip/sched/core]
[also build test ERROR on peterz-queue/sched/core linus/master v6.14-rc6 next-20250307]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Vincent-Guittot/sched-fair-Filter-false-overloaded_group-case-for-EAS/20250303-050850
base:   tip/sched/core
patch link:    https://lore.kernel.org/r/20250302210539.1563190-6-vincent.guittot%40linaro.org
patch subject: [PATCH 5/7 v5] sched/fair: Add push task mechanism for EAS
config: arm-realview_defconfig (https://download.01.org/0day-ci/archive/20250310/202503102022.MhverD5b-lkp@intel.com/config)
compiler: clang version 19.1.7 (https://github.com/llvm/llvm-project cd708029e0b2869e80abe31ddb175f7c35361f90)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250310/202503102022.MhverD5b-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202503102022.MhverD5b-lkp@intel.com/

All errors (new ones prefixed by >>):

>> kernel/sched/fair.c:8675:36: error: no member named 'pushable_tasks' in 'struct cfs_rq'
    8675 |         return !plist_head_empty(&rq->cfs.pushable_tasks);
         |                                   ~~~~~~~ ^
   kernel/sched/fair.c:8685:33: error: no member named 'pushable_tasks' in 'struct cfs_rq'
    8685 |         p = plist_first_entry(&rq->cfs.pushable_tasks,
         |                                ~~~~~~~ ^
   include/linux/plist.h:233:27: note: expanded from macro 'plist_first_entry'
     233 |         container_of(plist_first(head), type, member)
         |                                  ^~~~
   include/linux/container_of.h:19:26: note: expanded from macro 'container_of'
      19 |         void *__mptr = (void *)(ptr);                                   \
         |                                 ^~~
   kernel/sched/fair.c:8685:33: error: no member named 'pushable_tasks' in 'struct cfs_rq'
    8685 |         p = plist_first_entry(&rq->cfs.pushable_tasks,
         |                                ~~~~~~~ ^
   include/linux/plist.h:233:27: note: expanded from macro 'plist_first_entry'
     233 |         container_of(plist_first(head), type, member)
         |                                  ^~~~
   include/linux/container_of.h:20:30: note: expanded from macro 'container_of'
      20 |         static_assert(__same_type(*(ptr), ((type *)0)->member) ||       \
         |                                     ^~~
   include/linux/compiler_types.h:483:63: note: expanded from macro '__same_type'
     483 | #define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
         |                                                               ^
   include/linux/build_bug.h:77:50: note: expanded from macro 'static_assert'
      77 | #define static_assert(expr, ...) __static_assert(expr, ##__VA_ARGS__, #expr)
         |                                                  ^~~~
   include/linux/build_bug.h:78:56: note: expanded from macro '__static_assert'
      78 | #define __static_assert(expr, msg, ...) _Static_assert(expr, msg)
         |                                                        ^~~~
   kernel/sched/fair.c:8685:33: error: no member named 'pushable_tasks' in 'struct cfs_rq'
    8685 |         p = plist_first_entry(&rq->cfs.pushable_tasks,
         |                                ~~~~~~~ ^
   include/linux/plist.h:233:27: note: expanded from macro 'plist_first_entry'
     233 |         container_of(plist_first(head), type, member)
         |                                  ^~~~
   include/linux/container_of.h:21:23: note: expanded from macro 'container_of'
      21 |                       __same_type(*(ptr), void),                        \
         |                                     ^~~
   include/linux/compiler_types.h:483:63: note: expanded from macro '__same_type'
     483 | #define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
         |                                                               ^
   include/linux/build_bug.h:77:50: note: expanded from macro 'static_assert'
      77 | #define static_assert(expr, ...) __static_assert(expr, ##__VA_ARGS__, #expr)
         |                                                  ^~~~
   include/linux/build_bug.h:78:56: note: expanded from macro '__static_assert'
      78 | #define __static_assert(expr, msg, ...) _Static_assert(expr, msg)
         |                                                        ^~~~
   kernel/sched/fair.c:8697:41: error: no member named 'pushable_tasks' in 'struct cfs_rq'
    8697 |         plist_del(&p->pushable_tasks, &rq->cfs.pushable_tasks);
         |                                        ~~~~~~~ ^
   kernel/sched/fair.c:8772:42: error: no member named 'pushable_tasks' in 'struct cfs_rq'
    8772 |                 plist_del(&p->pushable_tasks, &rq->cfs.pushable_tasks);
         |                                                ~~~~~~~ ^
   kernel/sched/fair.c:8779:43: error: no member named 'pushable_tasks' in 'struct cfs_rq'
    8779 |                         plist_del(&p->pushable_tasks, &rq->cfs.pushable_tasks);
         |                                                        ~~~~~~~ ^
   kernel/sched/fair.c:8781:43: error: no member named 'pushable_tasks' in 'struct cfs_rq'
    8781 |                         plist_add(&p->pushable_tasks, &rq->cfs.pushable_tasks);
         |                                                        ~~~~~~~ ^
   kernel/sched/fair.c:13572:27: error: no member named 'pushable_tasks' in 'struct cfs_rq'
    13572 |         plist_head_init(&cfs_rq->pushable_tasks);
          |                          ~~~~~~  ^
   9 errors generated.


vim +8675 kernel/sched/fair.c

  8672	
  8673	static inline int has_pushable_tasks(struct rq *rq)
  8674	{
> 8675		return !plist_head_empty(&rq->cfs.pushable_tasks);
  8676	}
  8677	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

Re: [PATCH 5/7 v5] sched/fair: Add push task mechanism for EAS

Posted by kernel test robot 11 months, 1 week ago

Hi Vincent,

kernel test robot noticed the following build errors:

[auto build test ERROR on tip/sched/core]
[also build test ERROR on peterz-queue/sched/core linus/master v6.14-rc5 next-20250306]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Vincent-Guittot/sched-fair-Filter-false-overloaded_group-case-for-EAS/20250303-050850
base:   tip/sched/core
patch link:    https://lore.kernel.org/r/20250302210539.1563190-6-vincent.guittot%40linaro.org
patch subject: [PATCH 5/7 v5] sched/fair: Add push task mechanism for EAS
config: x86_64-randconfig-003-20250307 (https://download.01.org/0day-ci/archive/20250307/202503072035.8PEXiAFe-lkp@intel.com/config)
compiler: gcc-11 (Debian 11.3.0-12) 11.3.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250307/202503072035.8PEXiAFe-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202503072035.8PEXiAFe-lkp@intel.com/

All errors (new ones prefixed by >>):

   kernel/sched/fair.c: In function 'has_pushable_tasks':
   kernel/sched/fair.c:8675:42: error: 'struct cfs_rq' has no member named 'pushable_tasks'
    8675 |         return !plist_head_empty(&rq->cfs.pushable_tasks);
         |                                          ^
   In file included from include/linux/kernel.h:22,
                    from include/linux/cpumask.h:11,
                    from include/linux/energy_model.h:4,
                    from kernel/sched/fair.c:23:
   kernel/sched/fair.c: In function 'pick_next_pushable_fair_task':
   kernel/sched/fair.c:8685:39: error: 'struct cfs_rq' has no member named 'pushable_tasks'
    8685 |         p = plist_first_entry(&rq->cfs.pushable_tasks,
         |                                       ^
   include/linux/container_of.h:19:33: note: in definition of macro 'container_of'
      19 |         void *__mptr = (void *)(ptr);                                   \
         |                                 ^~~
   kernel/sched/fair.c:8685:13: note: in expansion of macro 'plist_first_entry'
    8685 |         p = plist_first_entry(&rq->cfs.pushable_tasks,
         |             ^~~~~~~~~~~~~~~~~
   In file included from include/linux/container_of.h:5,
                    from include/linux/kernel.h:22,
                    from include/linux/cpumask.h:11,
                    from include/linux/energy_model.h:4,
                    from kernel/sched/fair.c:23:
   kernel/sched/fair.c:8685:39: error: 'struct cfs_rq' has no member named 'pushable_tasks'
    8685 |         p = plist_first_entry(&rq->cfs.pushable_tasks,
         |                                       ^
   include/linux/build_bug.h:78:56: note: in definition of macro '__static_assert'
      78 | #define __static_assert(expr, msg, ...) _Static_assert(expr, msg)
         |                                                        ^~~~
   include/linux/container_of.h:20:9: note: in expansion of macro 'static_assert'
      20 |         static_assert(__same_type(*(ptr), ((type *)0)->member) ||       \
         |         ^~~~~~~~~~~~~
   include/linux/container_of.h:20:23: note: in expansion of macro '__same_type'
      20 |         static_assert(__same_type(*(ptr), ((type *)0)->member) ||       \
         |                       ^~~~~~~~~~~
   include/linux/plist.h:233:9: note: in expansion of macro 'container_of'
     233 |         container_of(plist_first(head), type, member)
         |         ^~~~~~~~~~~~
   kernel/sched/fair.c:8685:13: note: in expansion of macro 'plist_first_entry'
    8685 |         p = plist_first_entry(&rq->cfs.pushable_tasks,
         |             ^~~~~~~~~~~~~~~~~
   kernel/sched/fair.c:8685:39: error: 'struct cfs_rq' has no member named 'pushable_tasks'
    8685 |         p = plist_first_entry(&rq->cfs.pushable_tasks,
         |                                       ^
   include/linux/build_bug.h:78:56: note: in definition of macro '__static_assert'
      78 | #define __static_assert(expr, msg, ...) _Static_assert(expr, msg)
         |                                                        ^~~~
   include/linux/container_of.h:20:9: note: in expansion of macro 'static_assert'
      20 |         static_assert(__same_type(*(ptr), ((type *)0)->member) ||       \
         |         ^~~~~~~~~~~~~
   include/linux/container_of.h:21:23: note: in expansion of macro '__same_type'
      21 |                       __same_type(*(ptr), void),                        \
         |                       ^~~~~~~~~~~
   include/linux/plist.h:233:9: note: in expansion of macro 'container_of'
     233 |         container_of(plist_first(head), type, member)
         |         ^~~~~~~~~~~~
   kernel/sched/fair.c:8685:13: note: in expansion of macro 'plist_first_entry'
    8685 |         p = plist_first_entry(&rq->cfs.pushable_tasks,
         |             ^~~~~~~~~~~~~~~~~
>> include/linux/compiler_types.h:483:27: error: expression in static assertion is not an integer
     483 | #define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
         |                           ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/build_bug.h:78:56: note: in definition of macro '__static_assert'
      78 | #define __static_assert(expr, msg, ...) _Static_assert(expr, msg)
         |                                                        ^~~~
   include/linux/container_of.h:20:9: note: in expansion of macro 'static_assert'
      20 |         static_assert(__same_type(*(ptr), ((type *)0)->member) ||       \
         |         ^~~~~~~~~~~~~
   include/linux/container_of.h:20:23: note: in expansion of macro '__same_type'
      20 |         static_assert(__same_type(*(ptr), ((type *)0)->member) ||       \
         |                       ^~~~~~~~~~~
   include/linux/plist.h:233:9: note: in expansion of macro 'container_of'
     233 |         container_of(plist_first(head), type, member)
         |         ^~~~~~~~~~~~
   kernel/sched/fair.c:8685:13: note: in expansion of macro 'plist_first_entry'
    8685 |         p = plist_first_entry(&rq->cfs.pushable_tasks,
         |             ^~~~~~~~~~~~~~~~~
   kernel/sched/fair.c:8697:47: error: 'struct cfs_rq' has no member named 'pushable_tasks'
    8697 |         plist_del(&p->pushable_tasks, &rq->cfs.pushable_tasks);
         |                                               ^
   kernel/sched/fair.c: In function 'fair_remove_pushable_task':
   kernel/sched/fair.c:8772:55: error: 'struct cfs_rq' has no member named 'pushable_tasks'
    8772 |                 plist_del(&p->pushable_tasks, &rq->cfs.pushable_tasks);
         |                                                       ^
   kernel/sched/fair.c: In function 'fair_add_pushable_task':
   kernel/sched/fair.c:8779:63: error: 'struct cfs_rq' has no member named 'pushable_tasks'
    8779 |                         plist_del(&p->pushable_tasks, &rq->cfs.pushable_tasks);
         |                                                               ^
   kernel/sched/fair.c:8781:63: error: 'struct cfs_rq' has no member named 'pushable_tasks'
    8781 |                         plist_add(&p->pushable_tasks, &rq->cfs.pushable_tasks);
         |                                                               ^
   kernel/sched/fair.c: In function 'init_cfs_rq':
   kernel/sched/fair.c:13572:32: error: 'struct cfs_rq' has no member named 'pushable_tasks'
   13572 |         plist_head_init(&cfs_rq->pushable_tasks);
         |                                ^~
   kernel/sched/fair.c: In function 'has_pushable_tasks':
   kernel/sched/fair.c:8676:1: warning: control reaches end of non-void function [-Wreturn-type]
    8676 | }
         | ^


vim +483 include/linux/compiler_types.h

eb111869301e15 Rasmus Villemoes 2019-09-13  481  
d15155824c5014 Will Deacon      2017-10-24  482  /* Are two types/vars the same type (ignoring qualifiers)? */
d15155824c5014 Will Deacon      2017-10-24 @483  #define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
d15155824c5014 Will Deacon      2017-10-24  484  

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

[PATCH 1/7 v5] sched/fair: Filter false overloaded_group case for EAS
[PATCH 2/7 v5] energy model: Add a get previous state function
[PATCH 3/7 v5] sched/fair: Rework feec() to use cost instead of spare capacity
[PATCH 4/7 v5] energy model: Remove unused em_cpu_energy()
[PATCH 5/7 v5] sched/fair: Add push task mechanism for EAS
[PATCH 6/7 v5] sched/fair: Add misfit case to push task mecanism for EAS
[PATCH 7/7 v5] sched/fair: Update overutilized detection