EAS is based on wakeup events to efficiently place tasks on the system, but
there are cases where a task doesn't have wakeup events anymore or at a far
too low pace. For such cases, we check if it's worht pushing hte task on
another CPUs instead of putting it back in the enqueued list.
Wake up events remain the main way to migrate tasks but we now detect
situation where a task is stuck on a CPU by checking that its utilization
is larger than the max available compute capacity (max cpu capacity or
uclamp max setting)
When the system becomes overutilized and some CPUs are idle, we try to
push tasks instead of waiting periodic load balance.
Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
---
kernel/sched/fair.c | 65 +++++++++++++++++++++++++++++++++++++++++
kernel/sched/topology.c | 3 ++
2 files changed, 68 insertions(+)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 9af8d0a61856..e9e1d0c05805 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6990,6 +6990,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
}
static void fair_remove_pushable_task(struct rq *rq, struct task_struct *p);
+
/*
* Basically dequeue_task_fair(), except it can deal with dequeue_entity()
* failing half-way through and resume the dequeue later.
@@ -8499,8 +8500,72 @@ static inline bool sched_push_task_enabled(void)
return static_branch_unlikely(&sched_push_task);
}
+static inline bool task_stuck_on_cpu(struct task_struct *p, int cpu)
+{
+ unsigned long max_capa, util;
+
+ max_capa = min(get_actual_cpu_capacity(cpu),
+ uclamp_eff_value(p, UCLAMP_MAX));
+ util = max(task_util_est(p), task_runnable(p));
+
+ /*
+ * Return true only if the task might not sleep/wakeup because of a low
+ * compute capacity. Tasks, which wake up regularly, will be handled by
+ * feec().
+ */
+ return (util > max_capa);
+}
+
+static inline bool sched_energy_push_task(struct task_struct *p, struct rq *rq)
+{
+ if (!sched_energy_enabled())
+ return false;
+
+ if (is_rd_overutilized(rq->rd))
+ return false;
+
+ if (task_stuck_on_cpu(p, cpu_of(rq)))
+ return true;
+
+ if (!task_fits_cpu(p, cpu_of(rq)))
+ return true;
+
+ return false;
+}
+
+static inline bool sched_idle_push_task(struct task_struct *p, struct rq *rq)
+{
+ if (rq->nr_running == 1)
+ return false;
+
+ if (!is_rd_overutilized(rq->rd))
+ return false;
+
+ /* If there are idle cpus in the llc then try to push the task on it */
+ if (test_idle_cores(cpu_of(rq)))
+ return true;
+
+ return false;
+}
+
+
static bool fair_push_task(struct rq *rq, struct task_struct *p)
{
+ if (!task_on_rq_queued(p))
+ return false;
+
+ if (p->se.sched_delayed)
+ return false;
+
+ if (p->nr_cpus_allowed == 1)
+ return false;
+
+ if (sched_energy_push_task(p, rq))
+ return true;
+
+ if (sched_idle_push_task(p, rq))
+ return true;
+
return false;
}
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index cf643a5ddedd..5edf7b117ed9 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -391,10 +391,13 @@ static void sched_energy_set(bool has_eas)
if (sched_debug())
pr_info("%s: stopping EAS\n", __func__);
static_branch_disable_cpuslocked(&sched_energy_present);
+ static_branch_dec_cpuslocked(&sched_push_task);
+ } else if (has_eas && !sched_energy_enabled()) {
} else if (has_eas && !static_branch_unlikely(&sched_energy_present)) {
if (sched_debug())
pr_info("%s: starting EAS\n", __func__);
static_branch_enable_cpuslocked(&sched_energy_present);
+ static_branch_inc_cpuslocked(&sched_push_task);
}
}
--
2.43.0
Some nits below for now
On 12/1/25 09:13, Vincent Guittot wrote:
> EAS is based on wakeup events to efficiently place tasks on the system, but
> there are cases where a task doesn't have wakeup events anymore or at a far
> too low pace. For such cases, we check if it's worht pushing hte task on
worth
the
> another CPUs instead of putting it back in the enqueued list.
>
> Wake up events remain the main way to migrate tasks but we now detect
> situation where a task is stuck on a CPU by checking that its utilization
> is larger than the max available compute capacity (max cpu capacity or
> uclamp max setting)
>
> When the system becomes overutilized and some CPUs are idle, we try to
> push tasks instead of waiting periodic load balance.
>
> Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
> ---
> kernel/sched/fair.c | 65 +++++++++++++++++++++++++++++++++++++++++
> kernel/sched/topology.c | 3 ++
> 2 files changed, 68 insertions(+)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 9af8d0a61856..e9e1d0c05805 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -6990,6 +6990,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
> }
>
> static void fair_remove_pushable_task(struct rq *rq, struct task_struct *p);
> +
This doesn't belong here
> /*
> * Basically dequeue_task_fair(), except it can deal with dequeue_entity()
> * failing half-way through and resume the dequeue later.
> @@ -8499,8 +8500,72 @@ static inline bool sched_push_task_enabled(void)
> return static_branch_unlikely(&sched_push_task);
> }
>
> +static inline bool task_stuck_on_cpu(struct task_struct *p, int cpu)
> +{
> + unsigned long max_capa, util;
> +
> + max_capa = min(get_actual_cpu_capacity(cpu),
> + uclamp_eff_value(p, UCLAMP_MAX));
> + util = max(task_util_est(p), task_runnable(p));
> +
> + /*
> + * Return true only if the task might not sleep/wakeup because of a low
> + * compute capacity. Tasks, which wake up regularly, will be handled by
> + * feec().
> + */
> + return (util > max_capa);
> +}
> +
> +static inline bool sched_energy_push_task(struct task_struct *p, struct rq *rq)
> +{
> + if (!sched_energy_enabled())
> + return false;
> +
> + if (is_rd_overutilized(rq->rd))
> + return false;
> +
> + if (task_stuck_on_cpu(p, cpu_of(rq)))
> + return true;
> +
> + if (!task_fits_cpu(p, cpu_of(rq)))
> + return true;
> +
> + return false;
> +}
> +
> +static inline bool sched_idle_push_task(struct task_struct *p, struct rq *rq)
> +{
> + if (rq->nr_running == 1)
> + return false;
> +
> + if (!is_rd_overutilized(rq->rd))
> + return false;
> +
> + /* If there are idle cpus in the llc then try to push the task on it */
> + if (test_idle_cores(cpu_of(rq)))
> + return true;
> +
> + return false;
> +}
> +
> +
> static bool fair_push_task(struct rq *rq, struct task_struct *p)
> {
> + if (!task_on_rq_queued(p))
> + return false;
> +
> + if (p->se.sched_delayed)
> + return false;
> +
> + if (p->nr_cpus_allowed == 1)
> + return false;
> +
> + if (sched_energy_push_task(p, rq))
> + return true;
> +
> + if (sched_idle_push_task(p, rq))
> + return true;
> +
> return false;
> }
>
> diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
> index cf643a5ddedd..5edf7b117ed9 100644
> --- a/kernel/sched/topology.c
> +++ b/kernel/sched/topology.c
> @@ -391,10 +391,13 @@ static void sched_energy_set(bool has_eas)
> if (sched_debug())
> pr_info("%s: stopping EAS\n", __func__);
> static_branch_disable_cpuslocked(&sched_energy_present);
> + static_branch_dec_cpuslocked(&sched_push_task);
> + } else if (has_eas && !sched_energy_enabled()) {
> } else if (has_eas && !static_branch_unlikely(&sched_energy_present)) {
This could just be (has_eas && && sched_energy_enabled() && !static_branch_unlikely(&sched_energy_present))
to avoid the awkward else if above
> if (sched_debug())
> pr_info("%s: starting EAS\n", __func__);
> static_branch_enable_cpuslocked(&sched_energy_present);
> + static_branch_inc_cpuslocked(&sched_push_task);
> }
> }
>
On Mon, 1 Dec 2025 at 14:53, Christian Loehle <christian.loehle@arm.com> wrote:
>
> Some nits below for now
>
> On 12/1/25 09:13, Vincent Guittot wrote:
> > EAS is based on wakeup events to efficiently place tasks on the system, but
> > there are cases where a task doesn't have wakeup events anymore or at a far
> > too low pace. For such cases, we check if it's worht pushing hte task on
>
> worth
> the
+1
>
> > another CPUs instead of putting it back in the enqueued list.
> >
> > Wake up events remain the main way to migrate tasks but we now detect
> > situation where a task is stuck on a CPU by checking that its utilization
> > is larger than the max available compute capacity (max cpu capacity or
> > uclamp max setting)
> >
> > When the system becomes overutilized and some CPUs are idle, we try to
> > push tasks instead of waiting periodic load balance.
> >
> > Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
> > ---
> > kernel/sched/fair.c | 65 +++++++++++++++++++++++++++++++++++++++++
> > kernel/sched/topology.c | 3 ++
> > 2 files changed, 68 insertions(+)
> >
> > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> > index 9af8d0a61856..e9e1d0c05805 100644
> > --- a/kernel/sched/fair.c
> > +++ b/kernel/sched/fair.c
> > @@ -6990,6 +6990,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
> > }
> >
> > static void fair_remove_pushable_task(struct rq *rq, struct task_struct *p);
> > +
>
> This doesn't belong here
yes, don't know what I mess up with my patches
>
> > /*
> > * Basically dequeue_task_fair(), except it can deal with dequeue_entity()
> > * failing half-way through and resume the dequeue later.
> > @@ -8499,8 +8500,72 @@ static inline bool sched_push_task_enabled(void)
> > return static_branch_unlikely(&sched_push_task);
> > }
> >
> > +static inline bool task_stuck_on_cpu(struct task_struct *p, int cpu)
> > +{
> > + unsigned long max_capa, util;
> > +
> > + max_capa = min(get_actual_cpu_capacity(cpu),
> > + uclamp_eff_value(p, UCLAMP_MAX));
> > + util = max(task_util_est(p), task_runnable(p));
> > +
> > + /*
> > + * Return true only if the task might not sleep/wakeup because of a low
> > + * compute capacity. Tasks, which wake up regularly, will be handled by
> > + * feec().
> > + */
> > + return (util > max_capa);
> > +}
> > +
> > +static inline bool sched_energy_push_task(struct task_struct *p, struct rq *rq)
> > +{
> > + if (!sched_energy_enabled())
> > + return false;
> > +
> > + if (is_rd_overutilized(rq->rd))
> > + return false;
> > +
> > + if (task_stuck_on_cpu(p, cpu_of(rq)))
> > + return true;
> > +
> > + if (!task_fits_cpu(p, cpu_of(rq)))
> > + return true;
> > +
> > + return false;
> > +}
> > +
> > +static inline bool sched_idle_push_task(struct task_struct *p, struct rq *rq)
> > +{
> > + if (rq->nr_running == 1)
> > + return false;
> > +
> > + if (!is_rd_overutilized(rq->rd))
> > + return false;
> > +
> > + /* If there are idle cpus in the llc then try to push the task on it */
> > + if (test_idle_cores(cpu_of(rq)))
> > + return true;
> > +
> > + return false;
> > +}
> > +
> > +
> > static bool fair_push_task(struct rq *rq, struct task_struct *p)
> > {
> > + if (!task_on_rq_queued(p))
> > + return false;
> > +
> > + if (p->se.sched_delayed)
> > + return false;
> > +
> > + if (p->nr_cpus_allowed == 1)
> > + return false;
> > +
> > + if (sched_energy_push_task(p, rq))
> > + return true;
> > +
> > + if (sched_idle_push_task(p, rq))
> > + return true;
> > +
> > return false;
> > }
> >
> > diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
> > index cf643a5ddedd..5edf7b117ed9 100644
> > --- a/kernel/sched/topology.c
> > +++ b/kernel/sched/topology.c
> > @@ -391,10 +391,13 @@ static void sched_energy_set(bool has_eas)
> > if (sched_debug())
> > pr_info("%s: stopping EAS\n", __func__);
> > static_branch_disable_cpuslocked(&sched_energy_present);
> > + static_branch_dec_cpuslocked(&sched_push_task);
> > + } else if (has_eas && !sched_energy_enabled()) {
> > } else if (has_eas && !static_branch_unlikely(&sched_energy_present)) {
>
> This could just be (has_eas && && sched_energy_enabled() && !static_branch_unlikely(&sched_energy_present))
> to avoid the awkward else if above
Argh, I messed up something with this patchset and another pending
cleanup patch when I rebased it.
It should be :
static_branch_disable_cpuslocked(&sched_energy_present);
+ static_branch_dec_cpuslocked(&sched_push_task);
} else if (has_eas && !static_branch_unlikely(&sched_energy_present)) {
I need to rerun the bench to check that the results of the cover
letter are still correct.
That's what happens when you want to send a patchset too quickly ...
>
> > if (sched_debug())
> > pr_info("%s: starting EAS\n", __func__);
> > static_branch_enable_cpuslocked(&sched_energy_present);
> > + static_branch_inc_cpuslocked(&sched_push_task);
> > }
> > }
> >
>
On Mon, 1 Dec 2025 at 18:49, Vincent Guittot <vincent.guittot@linaro.org> wrote:
>
> On Mon, 1 Dec 2025 at 14:53, Christian Loehle <christian.loehle@arm.com> wrote:
> >
> > Some nits below for now
> >
> > On 12/1/25 09:13, Vincent Guittot wrote:
> > > EAS is based on wakeup events to efficiently place tasks on the system, but
> > > there are cases where a task doesn't have wakeup events anymore or at a far
> > > too low pace. For such cases, we check if it's worht pushing hte task on
> >
> > worth
> > the
>
> +1
>
> >
> > > another CPUs instead of putting it back in the enqueued list.
> > >
> > > Wake up events remain the main way to migrate tasks but we now detect
> > > situation where a task is stuck on a CPU by checking that its utilization
> > > is larger than the max available compute capacity (max cpu capacity or
> > > uclamp max setting)
> > >
> > > When the system becomes overutilized and some CPUs are idle, we try to
> > > push tasks instead of waiting periodic load balance.
> > >
> > > Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
> > > ---
> > > kernel/sched/fair.c | 65 +++++++++++++++++++++++++++++++++++++++++
> > > kernel/sched/topology.c | 3 ++
> > > 2 files changed, 68 insertions(+)
> > >
> > > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> > > index 9af8d0a61856..e9e1d0c05805 100644
> > > --- a/kernel/sched/fair.c
> > > +++ b/kernel/sched/fair.c
> > > @@ -6990,6 +6990,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
> > > }
> > >
> > > static void fair_remove_pushable_task(struct rq *rq, struct task_struct *p);
> > > +
> >
> > This doesn't belong here
>
> yes, don't know what I mess up with my patches
>
> >
> > > /*
> > > * Basically dequeue_task_fair(), except it can deal with dequeue_entity()
> > > * failing half-way through and resume the dequeue later.
> > > @@ -8499,8 +8500,72 @@ static inline bool sched_push_task_enabled(void)
> > > return static_branch_unlikely(&sched_push_task);
> > > }
> > >
> > > +static inline bool task_stuck_on_cpu(struct task_struct *p, int cpu)
> > > +{
> > > + unsigned long max_capa, util;
> > > +
> > > + max_capa = min(get_actual_cpu_capacity(cpu),
> > > + uclamp_eff_value(p, UCLAMP_MAX));
> > > + util = max(task_util_est(p), task_runnable(p));
> > > +
> > > + /*
> > > + * Return true only if the task might not sleep/wakeup because of a low
> > > + * compute capacity. Tasks, which wake up regularly, will be handled by
> > > + * feec().
> > > + */
> > > + return (util > max_capa);
> > > +}
> > > +
> > > +static inline bool sched_energy_push_task(struct task_struct *p, struct rq *rq)
> > > +{
> > > + if (!sched_energy_enabled())
> > > + return false;
> > > +
> > > + if (is_rd_overutilized(rq->rd))
> > > + return false;
> > > +
> > > + if (task_stuck_on_cpu(p, cpu_of(rq)))
> > > + return true;
> > > +
> > > + if (!task_fits_cpu(p, cpu_of(rq)))
> > > + return true;
> > > +
> > > + return false;
> > > +}
> > > +
> > > +static inline bool sched_idle_push_task(struct task_struct *p, struct rq *rq)
> > > +{
> > > + if (rq->nr_running == 1)
> > > + return false;
> > > +
> > > + if (!is_rd_overutilized(rq->rd))
> > > + return false;
> > > +
> > > + /* If there are idle cpus in the llc then try to push the task on it */
> > > + if (test_idle_cores(cpu_of(rq)))
> > > + return true;
> > > +
> > > + return false;
> > > +}
> > > +
> > > +
> > > static bool fair_push_task(struct rq *rq, struct task_struct *p)
> > > {
> > > + if (!task_on_rq_queued(p))
> > > + return false;
> > > +
> > > + if (p->se.sched_delayed)
> > > + return false;
> > > +
> > > + if (p->nr_cpus_allowed == 1)
> > > + return false;
> > > +
> > > + if (sched_energy_push_task(p, rq))
> > > + return true;
> > > +
> > > + if (sched_idle_push_task(p, rq))
> > > + return true;
> > > +
> > > return false;
> > > }
> > >
> > > diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
> > > index cf643a5ddedd..5edf7b117ed9 100644
> > > --- a/kernel/sched/topology.c
> > > +++ b/kernel/sched/topology.c
> > > @@ -391,10 +391,13 @@ static void sched_energy_set(bool has_eas)
> > > if (sched_debug())
> > > pr_info("%s: stopping EAS\n", __func__);
> > > static_branch_disable_cpuslocked(&sched_energy_present);
> > > + static_branch_dec_cpuslocked(&sched_push_task);
> > > + } else if (has_eas && !sched_energy_enabled()) {
> > > } else if (has_eas && !static_branch_unlikely(&sched_energy_present)) {
> >
> > This could just be (has_eas && && sched_energy_enabled() && !static_branch_unlikely(&sched_energy_present))
> > to avoid the awkward else if above
>
> Argh, I messed up something with this patchset and another pending
> cleanup patch when I rebased it.
> It should be :
>
> static_branch_disable_cpuslocked(&sched_energy_present);
> + static_branch_dec_cpuslocked(&sched_push_task);
> } else if (has_eas && !static_branch_unlikely(&sched_energy_present)) {
>
> I need to rerun the bench to check that the results of the cover
> letter are still correct.
And the results are now the same
Sorry for the noise, I'm going to fix this in a v8
>
> That's what happens when you want to send a patchset too quickly ...
>
>
> >
> > > if (sched_debug())
> > > pr_info("%s: starting EAS\n", __func__);
> > > static_branch_enable_cpuslocked(&sched_energy_present);
> > > + static_branch_inc_cpuslocked(&sched_push_task);
> > > }
> > > }
> > >
> >
© 2016 - 2025 Red Hat, Inc.