[RFC PATCH 5/5] sched/fair: Add push task callback for EAS

Vincent Guittot posted 5 patches 1 year, 3 months ago
There is a newer version of this series
[RFC PATCH 5/5] sched/fair: Add push task callback for EAS
Posted by Vincent Guittot 1 year, 3 months ago
EAS is based on wakeup events to efficiently place tasks on the system, but
there are cases where a task will not have wakeup events anymore or at a
far too low pace. For such situation, we can take advantage of the task
being put back in the enqueued list to check if it should be migrated on
another CPU. When the task is the only one running on the CPU, the tick
will check it the task is stuck on this CPU and should migrate on another
one.

Wake up events remain the main way to migrate tasks but we now detect
situation where a task is stuck on a CPU by checking that its utilization
is larger than the max available compute capacity (max cpu capacity or
uclamp max setting)

Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
---
 kernel/sched/fair.c  | 211 +++++++++++++++++++++++++++++++++++++++++++
 kernel/sched/sched.h |   2 +
 2 files changed, 213 insertions(+)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e46af2416159..41fb18ac118b 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5455,6 +5455,7 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
 }
 
 static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq);
+static void dequeue_pushable_task(struct cfs_rq *cfs_rq, struct sched_entity *se, bool queue);
 
 static bool
 dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
@@ -5463,6 +5464,8 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 
 	update_curr(cfs_rq);
 
+	dequeue_pushable_task(cfs_rq, se, false);
+
 	if (flags & DEQUEUE_DELAYED) {
 		SCHED_WARN_ON(!se->sched_delayed);
 	} else {
@@ -5585,6 +5588,8 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	}
 
 	se->prev_sum_exec_runtime = se->sum_exec_runtime;
+
+	dequeue_pushable_task(cfs_rq, se, true);
 }
 
 static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags);
@@ -5620,6 +5625,7 @@ pick_next_entity(struct rq *rq, struct cfs_rq *cfs_rq)
 }
 
 static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq);
+static void enqueue_pushable_task(struct cfs_rq *cfs_rq, struct sched_entity *se);
 
 static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
 {
@@ -5639,9 +5645,16 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
 		__enqueue_entity(cfs_rq, prev);
 		/* in !on_rq case, update occurred at dequeue */
 		update_load_avg(cfs_rq, prev, 0);
+
+		/*
+		 * The previous task might be eligible for pushing it on
+		 * another cpu if it is still active.
+		 */
+		enqueue_pushable_task(cfs_rq, prev);
 	}
 	SCHED_WARN_ON(cfs_rq->curr != prev);
 	cfs_rq->curr = NULL;
+
 }
 
 static void
@@ -8393,6 +8406,8 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
 			target_stat.runnable = cpu_runnable(cpu_rq(cpu));
 			target_stat.capa = capacity_of(cpu);
 			target_stat.nr_running = cpu_rq(cpu)->cfs.h_nr_running;
+			if ((p->on_rq) && (cpu == prev_cpu))
+				target_stat.nr_running--;
 
 			/* If the target needs a lower OPP, then look up for
 			 * the corresponding OPP and its associated cost.
@@ -8473,6 +8488,197 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
 	return target;
 }
 
+static inline bool task_misfit_cpu(struct task_struct *p, int cpu)
+{
+	unsigned long max_capa = get_actual_cpu_capacity(cpu);
+	unsigned long util = task_util_est(p);
+
+	max_capa = min(max_capa, uclamp_eff_value(p, UCLAMP_MAX));
+	util = max(util, task_runnable(p));
+
+	/*
+	 * Return true only if the task might not sleep/wakeup because of a low
+	 * compute capacity. Tasks, which wake up regularly, will be handled by
+	 * feec().
+	 */
+	return (util > max_capa);
+}
+
+static int active_load_balance_cpu_stop(void *data);
+
+static inline void check_misfit_cpu(struct task_struct *p, struct rq *rq)
+{
+	int new_cpu, cpu = cpu_of(rq);
+
+	if (!sched_energy_enabled())
+		return;
+
+	if (WARN_ON(!p))
+		return;
+
+	if (WARN_ON(p != rq->curr))
+		return;
+
+	if (is_migration_disabled(p))
+		return;
+
+	if ((rq->nr_running > 1) || (p->nr_cpus_allowed == 1))
+		return;
+
+	if (!task_misfit_cpu(p, cpu))
+		return;
+
+	new_cpu = find_energy_efficient_cpu(p, cpu);
+
+	if (new_cpu == cpu)
+		return;
+
+	/*
+	 * ->active_balance synchronizes accesses to
+	 * ->active_balance_work.  Once set, it's cleared
+	 * only after active load balance is finished.
+	 */
+	if (!rq->active_balance) {
+		rq->active_balance = 1;
+		rq->push_cpu = new_cpu;
+	} else
+		return;
+
+	raw_spin_rq_unlock(rq);
+	stop_one_cpu_nowait(cpu,
+		active_load_balance_cpu_stop, rq,
+		&rq->active_balance_work);
+	raw_spin_rq_lock(rq);
+}
+
+static inline int has_pushable_tasks(struct rq *rq)
+{
+	return !plist_head_empty(&rq->cfs.pushable_tasks);
+}
+
+static struct task_struct *pick_next_pushable_fair_task(struct rq *rq)
+{
+	struct task_struct *p;
+
+	if (!has_pushable_tasks(rq))
+		return NULL;
+
+	p = plist_first_entry(&rq->cfs.pushable_tasks,
+			      struct task_struct, pushable_tasks);
+
+	WARN_ON_ONCE(rq->cpu != task_cpu(p));
+	WARN_ON_ONCE(task_current(rq, p));
+	WARN_ON_ONCE(p->nr_cpus_allowed <= 1);
+
+	WARN_ON_ONCE(!task_on_rq_queued(p));
+
+	/*
+	 * Remove task from the pushable list as we try only once after
+	 * task has been put back in enqueued list.
+	 */
+	plist_del(&p->pushable_tasks, &rq->cfs.pushable_tasks);
+
+	return p;
+}
+
+/*
+ * See if the non running fair tasks on this rq
+ * can be sent to some other CPU that fits better with
+ * their profile.
+ */
+static int push_fair_task(struct rq *rq)
+{
+	struct task_struct *next_task;
+	struct rq *new_rq;
+	int prev_cpu, new_cpu;
+	int ret = 0;
+
+	next_task = pick_next_pushable_fair_task(rq);
+	if (!next_task)
+		return 0;
+
+	if (is_migration_disabled(next_task))
+		return 0;
+
+	if (WARN_ON(next_task == rq->curr))
+		return 0;
+
+	/* We might release rq lock */
+	get_task_struct(next_task);
+
+	prev_cpu = rq->cpu;
+
+	new_cpu = find_energy_efficient_cpu(next_task, prev_cpu);
+
+	if (new_cpu == prev_cpu)
+		goto out;
+
+	new_rq = cpu_rq(new_cpu);
+
+	if (double_lock_balance(rq, new_rq)) {
+
+		deactivate_task(rq, next_task, 0);
+		set_task_cpu(next_task, new_cpu);
+		activate_task(new_rq, next_task, 0);
+		ret = 1;
+
+		resched_curr(new_rq);
+
+		double_unlock_balance(rq, new_rq);
+	}
+
+out:
+	put_task_struct(next_task);
+
+	return ret;
+}
+
+static void push_fair_tasks(struct rq *rq)
+{
+	/* push_dl_task() will return true if it moved a -deadline task */
+	while (push_fair_task(rq))
+		;
+}
+
+static DEFINE_PER_CPU(struct balance_callback, fair_push_head);
+
+static inline void fair_queue_push_tasks(struct rq *rq)
+{
+	if (!sched_energy_enabled() || !has_pushable_tasks(rq))
+		return;
+
+	queue_balance_callback(rq, &per_cpu(fair_push_head, rq->cpu), push_fair_tasks);
+}
+static void dequeue_pushable_task(struct cfs_rq *cfs_rq, struct sched_entity *se, bool queue)
+{
+	struct task_struct *p;
+	struct rq *rq;
+
+	if (sched_energy_enabled() && entity_is_task(se)) {
+		rq = rq_of(cfs_rq);
+		p = container_of(se, struct task_struct, se);
+
+		plist_del(&p->pushable_tasks, &rq->cfs.pushable_tasks);
+
+		if (queue)
+			fair_queue_push_tasks(rq);
+	}
+}
+
+static void enqueue_pushable_task(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+	if (sched_energy_enabled() && entity_is_task(se)) {
+		struct task_struct *p = container_of(se, struct task_struct, se);
+		struct rq *rq = rq_of(cfs_rq);
+
+		if ((p->nr_cpus_allowed > 1) && task_misfit_cpu(p, rq->cpu)) {
+			plist_del(&p->pushable_tasks, &rq->cfs.pushable_tasks);
+			plist_node_init(&p->pushable_tasks, p->prio);
+			plist_add(&p->pushable_tasks, &rq->cfs.pushable_tasks);
+		}
+	}
+}
+
 /*
  * select_task_rq_fair: Select target runqueue for the waking task in domains
  * that have the relevant SD flag set. In practice, this is SD_BALANCE_WAKE,
@@ -8642,6 +8848,8 @@ balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 	return sched_balance_newidle(rq, rf) != 0;
 }
 #else
+static inline void dequeue_pushable_task(struct cfs_rq *cfs_rq, struct sched_entity *se, bool queue) {}
+static inline void enqueue_pushable_task(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
 static inline void set_task_max_allowed_capacity(struct task_struct *p) {}
 #endif /* CONFIG_SMP */
 
@@ -13013,6 +13221,8 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
 	check_update_overutilized_status(task_rq(curr));
 
 	task_tick_core(rq, curr);
+
+	check_misfit_cpu(curr, rq);
 }
 
 /*
@@ -13204,6 +13414,7 @@ static void set_next_task_fair(struct rq *rq, struct task_struct *p, bool first)
 void init_cfs_rq(struct cfs_rq *cfs_rq)
 {
 	cfs_rq->tasks_timeline = RB_ROOT_CACHED;
+	plist_head_init(&cfs_rq->pushable_tasks);
 	cfs_rq->min_vruntime = (u64)(-(1LL << 20));
 #ifdef CONFIG_SMP
 	raw_spin_lock_init(&cfs_rq->removed.lock);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 2f5d658c0631..f3327695d4a3 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -672,6 +672,8 @@ struct cfs_rq {
 	struct list_head	leaf_cfs_rq_list;
 	struct task_group	*tg;	/* group that "owns" this runqueue */
 
+	struct plist_head	pushable_tasks;
+
 	/* Locally cached copy of our task_group's idle value */
 	int			idle;
 
-- 
2.34.1
Re: [RFC PATCH 5/5] sched/fair: Add push task callback for EAS
Posted by Pierre Gondois 1 year, 3 months ago
Hello Vincent,

On 8/30/24 15:03, Vincent Guittot wrote:
> EAS is based on wakeup events to efficiently place tasks on the system, but
> there are cases where a task will not have wakeup events anymore or at a
> far too low pace. For such situation, we can take advantage of the task
> being put back in the enqueued list to check if it should be migrated on
> another CPU. When the task is the only one running on the CPU, the tick
> will check it the task is stuck on this CPU and should migrate on another
> one.
> 
> Wake up events remain the main way to migrate tasks but we now detect
> situation where a task is stuck on a CPU by checking that its utilization
> is larger than the max available compute capacity (max cpu capacity or
> uclamp max setting)
> 
> Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
> ---
>   kernel/sched/fair.c  | 211 +++++++++++++++++++++++++++++++++++++++++++
>   kernel/sched/sched.h |   2 +
>   2 files changed, 213 insertions(+)
> 
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index e46af2416159..41fb18ac118b 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c

[...]

> +
> +static inline void check_misfit_cpu(struct task_struct *p, struct rq *rq)
> +{
> +	int new_cpu, cpu = cpu_of(rq);
> +
> +	if (!sched_energy_enabled())
> +		return;
> +
> +	if (WARN_ON(!p))
> +		return;
> +
> +	if (WARN_ON(p != rq->curr))
> +		return;
> +
> +	if (is_migration_disabled(p))
> +		return;
> +
> +	if ((rq->nr_running > 1) || (p->nr_cpus_allowed == 1))

If the goal is to detect tasks that should be migrated to bigger CPUs,
couldn't the check be changed from:
-  (p->nr_cpus_allowed == 1)
to
- (p->max_allowed_capacity == arch_scale_cpu_capacity(cpu))
to avoid the case where a task is bound to the little cluster for instance ?

Similar question for update_misfit_status(), doesn't:
- (arch_scale_cpu_capacity(cpu) == p->max_allowed_capacity)
include this case:
- (p->nr_cpus_allowed == 1)


> +		return;
> +
> +	if (!task_misfit_cpu(p, cpu))
> +		return;

task_misfit_cpu() intends to check whether the task will have an opportunity
to run feec() though wakeups/push-pull.

Shouldn't we check whether the task fits the CPU with the 20% margin
with task_fits_cpu() aswell ? This would allow to migrate the task
faster than the load_balancer.


> +
> +	new_cpu = find_energy_efficient_cpu(p, cpu);
> +
> +	if (new_cpu == cpu)
> +		return;
> +
> +	/*
> +	 * ->active_balance synchronizes accesses to
> +	 * ->active_balance_work.  Once set, it's cleared
> +	 * only after active load balance is finished.
> +	 */
> +	if (!rq->active_balance) {
> +		rq->active_balance = 1;
> +		rq->push_cpu = new_cpu;
> +	} else
> +		return;
> +
> +	raw_spin_rq_unlock(rq);
> +	stop_one_cpu_nowait(cpu,
> +		active_load_balance_cpu_stop, rq,
> +		&rq->active_balance_work);
> +	raw_spin_rq_lock(rq);
> +}
> +

Regards,
Pierre
Re: [RFC PATCH 5/5] sched/fair: Add push task callback for EAS
Posted by Vincent Guittot 1 year, 2 months ago
On Fri, 13 Sept 2024 at 18:08, Pierre Gondois <pierre.gondois@arm.com> wrote:
>
> Hello Vincent,
>
> On 8/30/24 15:03, Vincent Guittot wrote:
> > EAS is based on wakeup events to efficiently place tasks on the system, but
> > there are cases where a task will not have wakeup events anymore or at a
> > far too low pace. For such situation, we can take advantage of the task
> > being put back in the enqueued list to check if it should be migrated on
> > another CPU. When the task is the only one running on the CPU, the tick
> > will check it the task is stuck on this CPU and should migrate on another
> > one.
> >
> > Wake up events remain the main way to migrate tasks but we now detect
> > situation where a task is stuck on a CPU by checking that its utilization
> > is larger than the max available compute capacity (max cpu capacity or
> > uclamp max setting)
> >
> > Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
> > ---
> >   kernel/sched/fair.c  | 211 +++++++++++++++++++++++++++++++++++++++++++
> >   kernel/sched/sched.h |   2 +
> >   2 files changed, 213 insertions(+)
> >
> > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> > index e46af2416159..41fb18ac118b 100644
> > --- a/kernel/sched/fair.c
> > +++ b/kernel/sched/fair.c
>
> [...]
>
> > +
> > +static inline void check_misfit_cpu(struct task_struct *p, struct rq *rq)
> > +{
> > +     int new_cpu, cpu = cpu_of(rq);
> > +
> > +     if (!sched_energy_enabled())
> > +             return;
> > +
> > +     if (WARN_ON(!p))
> > +             return;
> > +
> > +     if (WARN_ON(p != rq->curr))
> > +             return;
> > +
> > +     if (is_migration_disabled(p))
> > +             return;
> > +
> > +     if ((rq->nr_running > 1) || (p->nr_cpus_allowed == 1))
>
> If the goal is to detect tasks that should be migrated to bigger CPUs,

We don't want the tick to try to do active migration for the running
task if the task can't run on another CPU than this one. This is not
related to the migration to bigger CPUs that is done by
update_misfit_status().

> couldn't the check be changed from:
> -  (p->nr_cpus_allowed == 1)
> to
> - (p->max_allowed_capacity == arch_scale_cpu_capacity(cpu))
> to avoid the case where a task is bound to the little cluster for instance ?

I was about to say yes, but the condition
(arch_scale_cpu_capacity(cpu) == p->max_allowed_capacity) is too large
and prevents migrating to a smaller cpu which is one case that we want
to handle here.  That being said I have an internal patch that
includes the check done by update_misfit_status() for push callback
mechanism but I didn't add it in this version to not add to much
change in the same serie.

>
> Similar question for update_misfit_status(), doesn't:
> - (arch_scale_cpu_capacity(cpu) == p->max_allowed_capacity)
> include this case:
> - (p->nr_cpus_allowed == 1)

For update_misfit_status, you are right

>
>
> > +             return;
> > +
> > +     if (!task_misfit_cpu(p, cpu))
> > +             return;
>
> task_misfit_cpu() intends to check whether the task will have an opportunity
> to run feec() though wakeups/push-pull.
>
> Shouldn't we check whether the task fits the CPU with the 20% margin
> with task_fits_cpu() aswell ? This would allow to migrate the task
> faster than the load_balancer.

As mentioned above I have a patch that I didn't send that adds the
update_misfit_status() condition in the push call. I agree that should
speedup some migrations of misfit task to bigger cpu without waking up
an idle CPU to do the load balance and pull the task on a potentially
3rd CPU

>
>
> > +
> > +     new_cpu = find_energy_efficient_cpu(p, cpu);
> > +
> > +     if (new_cpu == cpu)
> > +             return;
> > +
> > +     /*
> > +      * ->active_balance synchronizes accesses to
> > +      * ->active_balance_work.  Once set, it's cleared
> > +      * only after active load balance is finished.
> > +      */
> > +     if (!rq->active_balance) {
> > +             rq->active_balance = 1;
> > +             rq->push_cpu = new_cpu;
> > +     } else
> > +             return;
> > +
> > +     raw_spin_rq_unlock(rq);
> > +     stop_one_cpu_nowait(cpu,
> > +             active_load_balance_cpu_stop, rq,
> > +             &rq->active_balance_work);
> > +     raw_spin_rq_lock(rq);
> > +}
> > +
>
> Regards,
> Pierre
Re: [RFC PATCH 5/5] sched/fair: Add push task callback for EAS
Posted by Pierre Gondois 1 year, 3 months ago
Hello Vincent,

On 8/30/24 15:03, Vincent Guittot wrote:
> EAS is based on wakeup events to efficiently place tasks on the system, but
> there are cases where a task will not have wakeup events anymore or at a
> far too low pace. For such situation, we can take advantage of the task
> being put back in the enqueued list to check if it should be migrated on
> another CPU. When the task is the only one running on the CPU, the tick
> will check it the task is stuck on this CPU and should migrate on another
> one.
> 
> Wake up events remain the main way to migrate tasks but we now detect
> situation where a task is stuck on a CPU by checking that its utilization
> is larger than the max available compute capacity (max cpu capacity or
> uclamp max setting)
> 
> Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
> ---
>   kernel/sched/fair.c  | 211 +++++++++++++++++++++++++++++++++++++++++++
>   kernel/sched/sched.h |   2 +
>   2 files changed, 213 insertions(+)
> 
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index e46af2416159..41fb18ac118b 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c

[snip]

> +
> +static inline void check_misfit_cpu(struct task_struct *p, struct rq *rq)
> +{
> +	int new_cpu, cpu = cpu_of(rq);
> +
> +	if (!sched_energy_enabled())
> +		return;
> +
> +	if (WARN_ON(!p))
> +		return;
> +
> +	if (WARN_ON(p != rq->curr))
> +		return;
> +
> +	if (is_migration_disabled(p))
> +		return;
> +
> +	if ((rq->nr_running > 1) || (p->nr_cpus_allowed == 1))
> +		return;

I tried the code on a Pixel6 with the following setup:
- without the above (rq->nr_running > 1) condition
- without the push task mechanism
i.e. tasks without regular wakeups only have the opportunity to
run feec() via the sched_tick. It seemed sufficient to avoid
the problematic you mentioned:
- having unbalanced UCLAMP_MAX tasks in a pd, e.g. 1 UCLAMP_MAX task
   per little CPU, except one little CPU with N UCLAMP_MAX tasks
- downgrading UCLAMP_MAX tasks that could run on smaller CPUs
   but have no wakeups and thus don't run feec()

Thus I was wondering it it would not be better to integrate the
EAS to the load balancer instead (not my idea, but don't remember
who suggested that).
Or otherwise if just running feec() through the sched_tick path
would not be sufficient (i.e. this patch minus the push mechanism).

> +
> +	if (!task_misfit_cpu(p, cpu))
> +		return;
> +
> +	new_cpu = find_energy_efficient_cpu(p, cpu);
> +
> +	if (new_cpu == cpu)
> +		return;
> +
> +	/*
> +	 * ->active_balance synchronizes accesses to
> +	 * ->active_balance_work.  Once set, it's cleared
> +	 * only after active load balance is finished.
> +	 */
> +	if (!rq->active_balance) {
> +		rq->active_balance = 1;
> +		rq->push_cpu = new_cpu;
> +	} else
> +		return;
> +
> +	raw_spin_rq_unlock(rq);
> +	stop_one_cpu_nowait(cpu,
> +		active_load_balance_cpu_stop, rq,
> +		&rq->active_balance_work);
> +	raw_spin_rq_lock(rq);

I didn't hit any error, but isn't it eligible to the following ?
   commit f0498d2a54e7 ("sched: Fix stop_one_cpu_nowait() vs hotplug")


> +}
> +
> +static inline int has_pushable_tasks(struct rq *rq)
> +{
> +	return !plist_head_empty(&rq->cfs.pushable_tasks);
> +}
> +
> +static struct task_struct *pick_next_pushable_fair_task(struct rq *rq)
> +{
> +	struct task_struct *p;
> +
> +	if (!has_pushable_tasks(rq))
> +		return NULL;
> +
> +	p = plist_first_entry(&rq->cfs.pushable_tasks,
> +			      struct task_struct, pushable_tasks);
> +
> +	WARN_ON_ONCE(rq->cpu != task_cpu(p));
> +	WARN_ON_ONCE(task_current(rq, p));
> +	WARN_ON_ONCE(p->nr_cpus_allowed <= 1);
> +
> +	WARN_ON_ONCE(!task_on_rq_queued(p));
> +
> +	/*
> +	 * Remove task from the pushable list as we try only once after
> +	 * task has been put back in enqueued list.
> +	 */
> +	plist_del(&p->pushable_tasks, &rq->cfs.pushable_tasks);
> +
> +	return p;
> +}
> +
> +/*
> + * See if the non running fair tasks on this rq
> + * can be sent to some other CPU that fits better with
> + * their profile.
> + */
> +static int push_fair_task(struct rq *rq)
> +{
> +	struct task_struct *next_task;
> +	struct rq *new_rq;
> +	int prev_cpu, new_cpu;
> +	int ret = 0;
> +
> +	next_task = pick_next_pushable_fair_task(rq);
> +	if (!next_task)
> +		return 0;
> +
> +	if (is_migration_disabled(next_task))
> +		return 0;
> +
> +	if (WARN_ON(next_task == rq->curr))
> +		return 0;
> +
> +	/* We might release rq lock */
> +	get_task_struct(next_task);
> +
> +	prev_cpu = rq->cpu;
> +
> +	new_cpu = find_energy_efficient_cpu(next_task, prev_cpu);
> +
> +	if (new_cpu == prev_cpu)
> +		goto out;
> +
> +	new_rq = cpu_rq(new_cpu);
> +
> +	if (double_lock_balance(rq, new_rq)) {


I think it might be necessary to check the following:
   if (task_cpu(next_task) != rq->cpu) {
     double_unlock_balance(rq, new_rq);
     goto out;
   }

Indeed I've been hitting the following warnings:
- uclamp_rq_dec_id():SCHED_WARN_ON(!bucket->tasks)
- set_task_cpu()::WARN_ON_ONCE(state == TASK_RUNNING &&
		     p->sched_class == &fair_sched_class &&
		     (p->on_rq && !task_on_rq_migrating(p)))
- update_entity_lag()::SCHED_WARN_ON(!se->on_rq)

and it seemed to be caused by the task not being on the initial rq anymore.

> +
> +		deactivate_task(rq, next_task, 0);
> +		set_task_cpu(next_task, new_cpu);
> +		activate_task(new_rq, next_task, 0);
> +		ret = 1;
> +
> +		resched_curr(new_rq);
> +
> +		double_unlock_balance(rq, new_rq);
> +	}
> +
> +out:
> +	put_task_struct(next_task);
> +
> +	return ret;
> +}
> +

Regards,
Pierre
Re: [RFC PATCH 5/5] sched/fair: Add push task callback for EAS
Posted by Vincent Guittot 1 year, 3 months ago
Hello Pierre,

On Wed, 11 Sept 2024 at 16:03, Pierre Gondois <pierre.gondois@arm.com> wrote:
>
> Hello Vincent,
>
> On 8/30/24 15:03, Vincent Guittot wrote:
> > EAS is based on wakeup events to efficiently place tasks on the system, but
> > there are cases where a task will not have wakeup events anymore or at a
> > far too low pace. For such situation, we can take advantage of the task
> > being put back in the enqueued list to check if it should be migrated on
> > another CPU. When the task is the only one running on the CPU, the tick
> > will check it the task is stuck on this CPU and should migrate on another
> > one.
> >
> > Wake up events remain the main way to migrate tasks but we now detect
> > situation where a task is stuck on a CPU by checking that its utilization
> > is larger than the max available compute capacity (max cpu capacity or
> > uclamp max setting)
> >
> > Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
> > ---
> >   kernel/sched/fair.c  | 211 +++++++++++++++++++++++++++++++++++++++++++
> >   kernel/sched/sched.h |   2 +
> >   2 files changed, 213 insertions(+)
> >
> > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> > index e46af2416159..41fb18ac118b 100644
> > --- a/kernel/sched/fair.c
> > +++ b/kernel/sched/fair.c
>
> [snip]
>
> > +
> > +static inline void check_misfit_cpu(struct task_struct *p, struct rq *rq)
> > +{
> > +     int new_cpu, cpu = cpu_of(rq);
> > +
> > +     if (!sched_energy_enabled())
> > +             return;
> > +
> > +     if (WARN_ON(!p))
> > +             return;
> > +
> > +     if (WARN_ON(p != rq->curr))
> > +             return;
> > +
> > +     if (is_migration_disabled(p))
> > +             return;
> > +
> > +     if ((rq->nr_running > 1) || (p->nr_cpus_allowed == 1))
> > +             return;
>
> I tried the code on a Pixel6 with the following setup:
> - without the above (rq->nr_running > 1) condition
> - without the push task mechanism
> i.e. tasks without regular wakeups only have the opportunity to
> run feec() via the sched_tick. It seemed sufficient to avoid
> the problematic you mentioned:
> - having unbalanced UCLAMP_MAX tasks in a pd, e.g. 1 UCLAMP_MAX task
>    per little CPU, except one little CPU with N UCLAMP_MAX tasks
> - downgrading UCLAMP_MAX tasks that could run on smaller CPUs
>    but have no wakeups and thus don't run feec()

The main problem with your test is that you always call feec() for the
running task so you always have to wake up the migration thread to
migrate the current running thread which is quite inefficient. The
push mechanism only takes a task which is not the current running one
and we don't need to wake up migration thread which is simpler and
more efficient. We check only one task at a time and will not loop on
an unbounded number of tasks after a task switch or a tick

>
> Thus I was wondering it it would not be better to integrate the
> EAS to the load balancer instead (not my idea, but don't remember
> who suggested that).

My 1st thought was also to use load balance to pull tasks which were
stuck on the wrong CPU (as mentioned in [1]) but this solution is not
scalable as we don't want to test all runnable task on a cpu and it's
not really easy to know which cpu and which tasks should be checked

[1] https://youtu.be/PHEBAyxeM_M?si=ZApIOw3BS4SOLPwp

> Or otherwise if just running feec() through the sched_tick path
> would not be sufficient (i.e. this patch minus the push mechanism).

As mentioned above, the push mechanism is more efficient than active migration.


>
> > +
> > +     if (!task_misfit_cpu(p, cpu))
> > +             return;
> > +
> > +     new_cpu = find_energy_efficient_cpu(p, cpu);
> > +
> > +     if (new_cpu == cpu)
> > +             return;
> > +
> > +     /*
> > +      * ->active_balance synchronizes accesses to
> > +      * ->active_balance_work.  Once set, it's cleared
> > +      * only after active load balance is finished.
> > +      */
> > +     if (!rq->active_balance) {
> > +             rq->active_balance = 1;
> > +             rq->push_cpu = new_cpu;
> > +     } else
> > +             return;
> > +
> > +     raw_spin_rq_unlock(rq);
> > +     stop_one_cpu_nowait(cpu,
> > +             active_load_balance_cpu_stop, rq,
> > +             &rq->active_balance_work);
> > +     raw_spin_rq_lock(rq);
>
> I didn't hit any error, but isn't it eligible to the following ?
>    commit f0498d2a54e7 ("sched: Fix stop_one_cpu_nowait() vs hotplug")
>

I will recheck but being called from the tick, for the local cpu and
with a running thread no being cpu_stopper_thread, should protect us
from the case describe in this commit

>
> > +}
> > +
> > +static inline int has_pushable_tasks(struct rq *rq)
> > +{
> > +     return !plist_head_empty(&rq->cfs.pushable_tasks);
> > +}
> > +
> > +static struct task_struct *pick_next_pushable_fair_task(struct rq *rq)
> > +{
> > +     struct task_struct *p;
> > +
> > +     if (!has_pushable_tasks(rq))
> > +             return NULL;
> > +
> > +     p = plist_first_entry(&rq->cfs.pushable_tasks,
> > +                           struct task_struct, pushable_tasks);
> > +
> > +     WARN_ON_ONCE(rq->cpu != task_cpu(p));
> > +     WARN_ON_ONCE(task_current(rq, p));
> > +     WARN_ON_ONCE(p->nr_cpus_allowed <= 1);
> > +
> > +     WARN_ON_ONCE(!task_on_rq_queued(p));
> > +
> > +     /*
> > +      * Remove task from the pushable list as we try only once after
> > +      * task has been put back in enqueued list.
> > +      */
> > +     plist_del(&p->pushable_tasks, &rq->cfs.pushable_tasks);
> > +
> > +     return p;
> > +}
> > +
> > +/*
> > + * See if the non running fair tasks on this rq
> > + * can be sent to some other CPU that fits better with
> > + * their profile.
> > + */
> > +static int push_fair_task(struct rq *rq)
> > +{
> > +     struct task_struct *next_task;
> > +     struct rq *new_rq;
> > +     int prev_cpu, new_cpu;
> > +     int ret = 0;
> > +
> > +     next_task = pick_next_pushable_fair_task(rq);
> > +     if (!next_task)
> > +             return 0;
> > +
> > +     if (is_migration_disabled(next_task))
> > +             return 0;
> > +
> > +     if (WARN_ON(next_task == rq->curr))
> > +             return 0;
> > +
> > +     /* We might release rq lock */
> > +     get_task_struct(next_task);
> > +
> > +     prev_cpu = rq->cpu;
> > +
> > +     new_cpu = find_energy_efficient_cpu(next_task, prev_cpu);
> > +
> > +     if (new_cpu == prev_cpu)
> > +             goto out;
> > +
> > +     new_rq = cpu_rq(new_cpu);
> > +
> > +     if (double_lock_balance(rq, new_rq)) {
>
>
> I think it might be necessary to check the following:
>    if (task_cpu(next_task) != rq->cpu) {
>      double_unlock_balance(rq, new_rq);

Yes good point

>      goto out;
>    }
>
> Indeed I've been hitting the following warnings:
> - uclamp_rq_dec_id():SCHED_WARN_ON(!bucket->tasks)
> - set_task_cpu()::WARN_ON_ONCE(state == TASK_RUNNING &&
>                      p->sched_class == &fair_sched_class &&
>                      (p->on_rq && !task_on_rq_migrating(p)))
> - update_entity_lag()::SCHED_WARN_ON(!se->on_rq)
>
> and it seemed to be caused by the task not being on the initial rq anymore.

Do you have a particular use case to trigger this ? I haven't faced
this in the various stress tests that  I did


>
> > +
> > +             deactivate_task(rq, next_task, 0);
> > +             set_task_cpu(next_task, new_cpu);
> > +             activate_task(new_rq, next_task, 0);
> > +             ret = 1;
> > +
> > +             resched_curr(new_rq);
> > +
> > +             double_unlock_balance(rq, new_rq);
> > +     }
> > +
> > +out:
> > +     put_task_struct(next_task);
> > +
> > +     return ret;
> > +}
> > +
>
> Regards,
> Pierre
Re: [RFC PATCH 5/5] sched/fair: Add push task callback for EAS
Posted by Pierre Gondois 1 year, 3 months ago
Hello Vincent,

On 9/12/24 14:30, Vincent Guittot wrote:
> Hello Pierre,
> 
> On Wed, 11 Sept 2024 at 16:03, Pierre Gondois <pierre.gondois@arm.com> wrote:
>>
>> Hello Vincent,
>>
>> On 8/30/24 15:03, Vincent Guittot wrote:
>>> EAS is based on wakeup events to efficiently place tasks on the system, but
>>> there are cases where a task will not have wakeup events anymore or at a
>>> far too low pace. For such situation, we can take advantage of the task
>>> being put back in the enqueued list to check if it should be migrated on
>>> another CPU. When the task is the only one running on the CPU, the tick
>>> will check it the task is stuck on this CPU and should migrate on another
>>> one.
>>>
>>> Wake up events remain the main way to migrate tasks but we now detect
>>> situation where a task is stuck on a CPU by checking that its utilization
>>> is larger than the max available compute capacity (max cpu capacity or
>>> uclamp max setting)
>>>
>>> Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
>>> ---
>>>    kernel/sched/fair.c  | 211 +++++++++++++++++++++++++++++++++++++++++++
>>>    kernel/sched/sched.h |   2 +
>>>    2 files changed, 213 insertions(+)
>>>
>>> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.cHel
>>> index e46af2416159..41fb18ac118b 100644
>>> --- a/kernel/sched/fair.c
>>> +++ b/kernel/sched/fair.c
>>
>> [snip]
>>
>>> +
>>> +static inline void check_misfit_cpu(struct task_struct *p, struct rq *rq)
>>> +{
>>> +     int new_cpu, cpu = cpu_of(rq);
>>> +
>>> +     if (!sched_energy_enabled())
>>> +             return;
>>> +
>>> +     if (WARN_ON(!p))
>>> +             return;
>>> +
>>> +     if (WARN_ON(p != rq->curr))
>>> +             return;
>>> +
>>> +     if (is_migration_disabled(p))
>>> +             return;
>>> +
>>> +     if ((rq->nr_running > 1) || (p->nr_cpus_allowed == 1))
>>> +             return;
>>
>> I tried the code on a Pixel6 with the following setup:
>> - without the above (rq->nr_running > 1) condition
>> - without the push task mechanism
>> i.e. tasks without regular wakeups only have the opportunity to
>> run feec() via the sched_tick. It seemed sufficient to avoid
>> the problematic you mentioned:
>> - having unbalanced UCLAMP_MAX tasks in a pd, e.g. 1 UCLAMP_MAX task
>>     per little CPU, except one little CPU with N UCLAMP_MAX tasks
>> - downgrading UCLAMP_MAX tasks that could run on smaller CPUs
>>     but have no wakeups and thus don't run feec()
> 
> The main problem with your test is that you always call feec() for the
> running task so you always have to wake up the migration thread to
> migrate the current running thread which is quite inefficient. The
> push mechanism only takes a task which is not the current running one
> and we don't need to wake up migration thread which is simpler and
> more efficient. We check only one task at a time and will not loop on
> an unbounded number of tasks after a task switch or a tick
> 
>>
>> Thus I was wondering it it would not be better to integrate the
>> EAS to the load balancer instead (not my idea, but don't remember
>> who suggested that).
> 
> My 1st thought was also to use load balance to pull tasks which were
> stuck on the wrong CPU (as mentioned in [1]) but this solution is not
> scalable as we don't want to test all runnable task on a cpu and it's
> not really easy to know which cpu and which tasks should be checked
> 
> [1] https://youtu.be/PHEBAyxeM_M?si=ZApIOw3BS4SOLPwp
> 
>> Or otherwise if just running feec() through the sched_tick path
>> would not be sufficient (i.e. this patch minus the push mechanism).
> 
> As mentioned above, the push mechanism is more efficient than active migration.
> 
> 
>>
>>> +
>>> +     if (!task_misfit_cpu(p, cpu))
>>> +             return;
>>> +
>>> +     new_cpu = find_energy_efficient_cpu(p, cpu);
>>> +
>>> +     if (new_cpu == cpu)
>>> +             return;
>>> +
>>> +     /*
>>> +      * ->active_balance synchronizes accesses to
>>> +      * ->active_balance_work.  Once set, it's cleared
>>> +      * only after active load balance is finished.
>>> +      */
>>> +     if (!rq->active_balance) {
>>> +             rq->active_balance = 1;
>>> +             rq->push_cpu = new_cpu;
>>> +     } else
>>> +             return;
>>> +
>>> +     raw_spin_rq_unlock(rq);
>>> +     stop_one_cpu_nowait(cpu,
>>> +             active_load_balance_cpu_stop, rq,
>>> +             &rq->active_balance_work);
>>> +     raw_spin_rq_lock(rq);
>>
>> I didn't hit any error, but isn't it eligible to the following ?
>>     commit f0498d2a54e7 ("sched: Fix stop_one_cpu_nowait() vs hotplug")
>>
> 
> I will recheck but being called from the tick, for the local cpu and
> with a running thread no being cpu_stopper_thread, should protect us
> from the case describe in this commit
> 
>>
>>> +}
>>> +
>>> +static inline int has_pushable_tasks(struct rq *rq)
>>> +{
>>> +     return !plist_head_empty(&rq->cfs.pushable_tasks);
>>> +}
>>> +
>>> +static struct task_struct *pick_next_pushable_fair_task(struct rq *rq)
>>> +{
>>> +     struct task_struct *p;
>>> +
>>> +     if (!has_pushable_tasks(rq))
>>> +             return NULL;
>>> +
>>> +     p = plist_first_entry(&rq->cfs.pushable_tasks,
>>> +                           struct task_struct, pushable_tasks);
>>> +
>>> +     WARN_ON_ONCE(rq->cpu != task_cpu(p));
>>> +     WARN_ON_ONCE(task_current(rq, p));
>>> +     WARN_ON_ONCE(p->nr_cpus_allowed <= 1);
>>> +
>>> +     WARN_ON_ONCE(!task_on_rq_queued(p));
>>> +
>>> +     /*
>>> +      * Remove task from the pushable list as we try only once after
>>> +      * task has been put back in enqueued list.
>>> +      */
>>> +     plist_del(&p->pushable_tasks, &rq->cfs.pushable_tasks);
>>> +
>>> +     return p;
>>> +}
>>> +
>>> +/*
>>> + * See if the non running fair tasks on this rq
>>> + * can be sent to some other CPU that fits better with
>>> + * their profile.
>>> + */
>>> +static int push_fair_task(struct rq *rq)
>>> +{
>>> +     struct task_struct *next_task;
>>> +     struct rq *new_rq;
>>> +     int prev_cpu, new_cpu;
>>> +     int ret = 0;
>>> +
>>> +     next_task = pick_next_pushable_fair_task(rq);
>>> +     if (!next_task)
>>> +             return 0;
>>> +
>>> +     if (is_migration_disabled(next_task))
>>> +             return 0;
>>> +
>>> +     if (WARN_ON(next_task == rq->curr))
>>> +             return 0;
>>> +
>>> +     /* We might release rq lock */
>>> +     get_task_struct(next_task);
>>> +
>>> +     prev_cpu = rq->cpu;
>>> +
>>> +     new_cpu = find_energy_efficient_cpu(next_task, prev_cpu);
>>> +
>>> +     if (new_cpu == prev_cpu)
>>> +             goto out;
>>> +
>>> +     new_rq = cpu_rq(new_cpu);
>>> +
>>> +     if (double_lock_balance(rq, new_rq)) {
>>
>>
>> I think it might be necessary to check the following:
>>     if (task_cpu(next_task) != rq->cpu) {
>>       double_unlock_balance(rq, new_rq);
> 
> Yes good point
> 
>>       goto out;
>>     }
>>
>> Indeed I've been hitting the following warnings:
>> - uclamp_rq_dec_id():SCHED_WARN_ON(!bucket->tasks)
>> - set_task_cpu()::WARN_ON_ONCE(state == TASK_RUNNING &&
>>                       p->sched_class == &fair_sched_class &&
>>                       (p->on_rq && !task_on_rq_migrating(p)))
>> - update_entity_lag()::SCHED_WARN_ON(!se->on_rq)
>>
>> and it seemed to be caused by the task not being on the initial rq anymore.
> 
> Do you have a particular use case to trigger this ? I haven't faced
> this in the various stress tests that  I did

It was triggered by this workload, but it was on a Pixel6 device,
so there might be more background activity:
- 8 tasks with: [UCLAMP_MIN:0, UCLAMP_MAX:1, duty_cycle:100%, period:16ms]
- 4 tasks each bound to a little CPU with: [UCLAMP_MIN:0, UCLAMP_MAX:1024, duty_cycle:4%, period:4ms]

Regards,
Pierre
Re: [RFC PATCH 5/5] sched/fair: Add push task callback for EAS
Posted by Vincent Guittot 1 year, 2 months ago
On Fri, 13 Sept 2024 at 11:10, Pierre Gondois <pierre.gondois@arm.com> wrote:
>
> Hello Vincent,
>
> On 9/12/24 14:30, Vincent Guittot wrote:
> > Hello Pierre,
> >
> > On Wed, 11 Sept 2024 at 16:03, Pierre Gondois <pierre.gondois@arm.com> wrote:
> >>
> >> Hello Vincent,
> >>
> >> On 8/30/24 15:03, Vincent Guittot wrote:
> >>> EAS is based on wakeup events to efficiently place tasks on the system, but
> >>> there are cases where a task will not have wakeup events anymore or at a
> >>> far too low pace. For such situation, we can take advantage of the task
> >>> being put back in the enqueued list to check if it should be migrated on
> >>> another CPU. When the task is the only one running on the CPU, the tick
> >>> will check it the task is stuck on this CPU and should migrate on another
> >>> one.
> >>>
> >>> Wake up events remain the main way to migrate tasks but we now detect
> >>> situation where a task is stuck on a CPU by checking that its utilization
> >>> is larger than the max available compute capacity (max cpu capacity or
> >>> uclamp max setting)
> >>>
> >>> Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
> >>> ---
> >>>    kernel/sched/fair.c  | 211 +++++++++++++++++++++++++++++++++++++++++++
> >>>    kernel/sched/sched.h |   2 +
> >>>    2 files changed, 213 insertions(+)
> >>>
> >>> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.cHel
> >>> index e46af2416159..41fb18ac118b 100644
> >>> --- a/kernel/sched/fair.c
> >>> +++ b/kernel/sched/fair.c
> >>
> >> [snip]
> >>
> >>> +
> >>> +static inline void check_misfit_cpu(struct task_struct *p, struct rq *rq)
> >>> +{
> >>> +     int new_cpu, cpu = cpu_of(rq);
> >>> +
> >>> +     if (!sched_energy_enabled())
> >>> +             return;
> >>> +
> >>> +     if (WARN_ON(!p))
> >>> +             return;
> >>> +
> >>> +     if (WARN_ON(p != rq->curr))
> >>> +             return;
> >>> +
> >>> +     if (is_migration_disabled(p))
> >>> +             return;
> >>> +
> >>> +     if ((rq->nr_running > 1) || (p->nr_cpus_allowed == 1))
> >>> +             return;
> >>
> >> I tried the code on a Pixel6 with the following setup:
> >> - without the above (rq->nr_running > 1) condition
> >> - without the push task mechanism
> >> i.e. tasks without regular wakeups only have the opportunity to
> >> run feec() via the sched_tick. It seemed sufficient to avoid
> >> the problematic you mentioned:
> >> - having unbalanced UCLAMP_MAX tasks in a pd, e.g. 1 UCLAMP_MAX task
> >>     per little CPU, except one little CPU with N UCLAMP_MAX tasks
> >> - downgrading UCLAMP_MAX tasks that could run on smaller CPUs
> >>     but have no wakeups and thus don't run feec()
> >
> > The main problem with your test is that you always call feec() for the
> > running task so you always have to wake up the migration thread to
> > migrate the current running thread which is quite inefficient. The
> > push mechanism only takes a task which is not the current running one
> > and we don't need to wake up migration thread which is simpler and
> > more efficient. We check only one task at a time and will not loop on
> > an unbounded number of tasks after a task switch or a tick
> >
> >>
> >> Thus I was wondering it it would not be better to integrate the
> >> EAS to the load balancer instead (not my idea, but don't remember
> >> who suggested that).
> >
> > My 1st thought was also to use load balance to pull tasks which were
> > stuck on the wrong CPU (as mentioned in [1]) but this solution is not
> > scalable as we don't want to test all runnable task on a cpu and it's
> > not really easy to know which cpu and which tasks should be checked
> >
> > [1] https://youtu.be/PHEBAyxeM_M?si=ZApIOw3BS4SOLPwp
> >
> >> Or otherwise if just running feec() through the sched_tick path
> >> would not be sufficient (i.e. this patch minus the push mechanism).
> >
> > As mentioned above, the push mechanism is more efficient than active migration.
> >
> >
> >>
> >>> +
> >>> +     if (!task_misfit_cpu(p, cpu))
> >>> +             return;
> >>> +
> >>> +     new_cpu = find_energy_efficient_cpu(p, cpu);
> >>> +
> >>> +     if (new_cpu == cpu)
> >>> +             return;
> >>> +
> >>> +     /*
> >>> +      * ->active_balance synchronizes accesses to
> >>> +      * ->active_balance_work.  Once set, it's cleared
> >>> +      * only after active load balance is finished.
> >>> +      */
> >>> +     if (!rq->active_balance) {
> >>> +             rq->active_balance = 1;
> >>> +             rq->push_cpu = new_cpu;
> >>> +     } else
> >>> +             return;
> >>> +
> >>> +     raw_spin_rq_unlock(rq);
> >>> +     stop_one_cpu_nowait(cpu,
> >>> +             active_load_balance_cpu_stop, rq,
> >>> +             &rq->active_balance_work);
> >>> +     raw_spin_rq_lock(rq);
> >>
> >> I didn't hit any error, but isn't it eligible to the following ?
> >>     commit f0498d2a54e7 ("sched: Fix stop_one_cpu_nowait() vs hotplug")
> >>
> >
> > I will recheck but being called from the tick, for the local cpu and
> > with a running thread no being cpu_stopper_thread, should protect us
> > from the case describe in this commit
> >
> >>
> >>> +}
> >>> +
> >>> +static inline int has_pushable_tasks(struct rq *rq)
> >>> +{
> >>> +     return !plist_head_empty(&rq->cfs.pushable_tasks);
> >>> +}
> >>> +
> >>> +static struct task_struct *pick_next_pushable_fair_task(struct rq *rq)
> >>> +{
> >>> +     struct task_struct *p;
> >>> +
> >>> +     if (!has_pushable_tasks(rq))
> >>> +             return NULL;
> >>> +
> >>> +     p = plist_first_entry(&rq->cfs.pushable_tasks,
> >>> +                           struct task_struct, pushable_tasks);
> >>> +
> >>> +     WARN_ON_ONCE(rq->cpu != task_cpu(p));
> >>> +     WARN_ON_ONCE(task_current(rq, p));
> >>> +     WARN_ON_ONCE(p->nr_cpus_allowed <= 1);
> >>> +
> >>> +     WARN_ON_ONCE(!task_on_rq_queued(p));
> >>> +
> >>> +     /*
> >>> +      * Remove task from the pushable list as we try only once after
> >>> +      * task has been put back in enqueued list.
> >>> +      */
> >>> +     plist_del(&p->pushable_tasks, &rq->cfs.pushable_tasks);
> >>> +
> >>> +     return p;
> >>> +}
> >>> +
> >>> +/*
> >>> + * See if the non running fair tasks on this rq
> >>> + * can be sent to some other CPU that fits better with
> >>> + * their profile.
> >>> + */
> >>> +static int push_fair_task(struct rq *rq)
> >>> +{
> >>> +     struct task_struct *next_task;
> >>> +     struct rq *new_rq;
> >>> +     int prev_cpu, new_cpu;
> >>> +     int ret = 0;
> >>> +
> >>> +     next_task = pick_next_pushable_fair_task(rq);
> >>> +     if (!next_task)
> >>> +             return 0;
> >>> +
> >>> +     if (is_migration_disabled(next_task))
> >>> +             return 0;
> >>> +
> >>> +     if (WARN_ON(next_task == rq->curr))
> >>> +             return 0;
> >>> +
> >>> +     /* We might release rq lock */
> >>> +     get_task_struct(next_task);
> >>> +
> >>> +     prev_cpu = rq->cpu;
> >>> +
> >>> +     new_cpu = find_energy_efficient_cpu(next_task, prev_cpu);
> >>> +
> >>> +     if (new_cpu == prev_cpu)
> >>> +             goto out;
> >>> +
> >>> +     new_rq = cpu_rq(new_cpu);
> >>> +
> >>> +     if (double_lock_balance(rq, new_rq)) {
> >>
> >>
> >> I think it might be necessary to check the following:
> >>     if (task_cpu(next_task) != rq->cpu) {
> >>       double_unlock_balance(rq, new_rq);
> >
> > Yes good point
> >
> >>       goto out;
> >>     }
> >>
> >> Indeed I've been hitting the following warnings:
> >> - uclamp_rq_dec_id():SCHED_WARN_ON(!bucket->tasks)
> >> - set_task_cpu()::WARN_ON_ONCE(state == TASK_RUNNING &&
> >>                       p->sched_class == &fair_sched_class &&
> >>                       (p->on_rq && !task_on_rq_migrating(p)))
> >> - update_entity_lag()::SCHED_WARN_ON(!se->on_rq)
> >>
> >> and it seemed to be caused by the task not being on the initial rq anymore.
> >
> > Do you have a particular use case to trigger this ? I haven't faced
> > this in the various stress tests that  I did
>
> It was triggered by this workload, but it was on a Pixel6 device,
> so there might be more background activity:
> - 8 tasks with: [UCLAMP_MIN:0, UCLAMP_MAX:1, duty_cycle:100%, period:16ms]
> - 4 tasks each bound to a little CPU with: [UCLAMP_MIN:0, UCLAMP_MAX:1024, duty_cycle:4%, period:4ms]

Thanks I'm going to have a closer look

>
> Regards,
> Pierre
Re: [RFC PATCH 5/5] sched/fair: Add push task callback for EAS
Posted by Christian Loehle 1 year, 3 months ago
On 8/30/24 14:03, Vincent Guittot wrote:
> EAS is based on wakeup events to efficiently place tasks on the system, but
> there are cases where a task will not have wakeup events anymore or at a
> far too low pace. For such situation, we can take advantage of the task
> being put back in the enqueued list to check if it should be migrated on
> another CPU. When the task is the only one running on the CPU, the tick
> will check it the task is stuck on this CPU and should migrate on another
> one.
> 
> Wake up events remain the main way to migrate tasks but we now detect
> situation where a task is stuck on a CPU by checking that its utilization
> is larger than the max available compute capacity (max cpu capacity or
> uclamp max setting)

Let me think out loud about this and feel free to object:
If there's other tasks on the rq we don't have that problem, if it is the
only one running it's utilization should be 1024, misfit should take care
of the upmigration on the way up.
If the task utilization is 1024 it needs to be alone on the rq, why would
another CPU be more efficient in that case (which presumably is an idle
CPU of the same PD)?
Or is this patch just for UCLAMP_MAX < 1024 cases altogether?
Re: [RFC PATCH 5/5] sched/fair: Add push task callback for EAS
Posted by Vincent Guittot 1 year, 3 months ago
On Mon, 9 Sept 2024 at 11:59, Christian Loehle <christian.loehle@arm.com> wrote:
>
> On 8/30/24 14:03, Vincent Guittot wrote:
> > EAS is based on wakeup events to efficiently place tasks on the system, but
> > there are cases where a task will not have wakeup events anymore or at a
> > far too low pace. For such situation, we can take advantage of the task
> > being put back in the enqueued list to check if it should be migrated on
> > another CPU. When the task is the only one running on the CPU, the tick
> > will check it the task is stuck on this CPU and should migrate on another
> > one.
> >
> > Wake up events remain the main way to migrate tasks but we now detect
> > situation where a task is stuck on a CPU by checking that its utilization
> > is larger than the max available compute capacity (max cpu capacity or
> > uclamp max setting)
>
> Let me think out loud about this and feel free to object:
> If there's other tasks on the rq we don't have that problem, if it is the

You might have been confused by the term utilization in the commit
message which includes both util_avg and runnable_avg of the task the
the max cpu capacity which is the get_actual_cpu_capacity

> only one running it's utilization should be 1024, misfit should take care
> of the upmigration on the way up.
> If the task utilization is 1024 it needs to be alone on the rq, why would
> another CPU be more efficient in that case (which presumably is an idle
> CPU of the same PD)?
> Or is this patch just for UCLAMP_MAX < 1024 cases altogether?

For a task alone stuck on a CPU, it's for the uclamp_max case although
this might also replace the misfit task behavior in the future.