Drop the sched_is_eas_possible() guard that rejects EAS whenever SMT is
active. This allows to enable EAS and perf-domain setup to succeed on
SD_ASYM_CPUCAPACITY topologies with SMT enabled.
Moreover, apply to find_energy_efficient_cpu() the same SMT-aware
preference as the non-EAS wakeup path: when SMT is active and there is a
fully-idle core in the relevant domain, prefer max-spare-capacity
candidates on fully-idle cores. Otherwise, fall back to the prior
behavior, to include also partially-idle SMT siblings.
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Christian Loehle <christian.loehle@arm.com>
Cc: Koba Ko <kobak@nvidia.com>
Reported-by: Felix Abecassis <fabecassis@nvidia.com>
Signed-off-by: Andrea Righi <arighi@nvidia.com>
---
kernel/sched/fair.c | 50 +++++++++++++++++++++++++++++++++++++++--
kernel/sched/topology.c | 9 --------
2 files changed, 48 insertions(+), 11 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f8deaaa5bfc85..593a89f688679 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8658,13 +8658,15 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
eenv_task_busy_time(&eenv, p, prev_cpu);
for (; pd; pd = pd->next) {
- unsigned long util_min = p_util_min, util_max = p_util_max;
unsigned long cpu_cap, cpu_actual_cap, util;
long prev_spare_cap = -1, max_spare_cap = -1;
+ long max_spare_cap_fallback = -1;
unsigned long rq_util_min, rq_util_max;
unsigned long cur_delta, base_energy;
- int max_spare_cap_cpu = -1;
+ int max_spare_cap_cpu = -1, max_spare_cap_cpu_fallback = -1;
int fits, max_fits = -1;
+ int max_fits_fallback = -1;
+ bool prefer_idle_cores;
if (!cpumask_and(cpus, perf_domain_span(pd), cpu_online_mask))
continue;
@@ -8676,6 +8678,8 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
eenv.cpu_cap = cpu_actual_cap;
eenv.pd_cap = 0;
+ prefer_idle_cores = sched_smt_active() && test_idle_cores(prev_cpu);
+
for_each_cpu(cpu, cpus) {
struct rq *rq = cpu_rq(cpu);
@@ -8687,6 +8691,11 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
if (!cpumask_test_cpu(cpu, p->cpus_ptr))
continue;
+ if (prefer_idle_cores && cpu != prev_cpu && !is_core_idle(cpu))
+ goto fallback;
+
+ unsigned long util_min = p_util_min, util_max = p_util_max;
+
util = cpu_util(cpu, p, cpu, 0);
cpu_cap = capacity_of(cpu);
@@ -8733,6 +8742,43 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
max_spare_cap_cpu = cpu;
max_fits = fits;
}
+
+fallback:
+ if (!prefer_idle_cores || cpu == prev_cpu || is_core_idle(cpu))
+ continue;
+
+ util_min = p_util_min;
+ util_max = p_util_max;
+ util = cpu_util(cpu, p, cpu, 0);
+ cpu_cap = capacity_of(cpu);
+
+ if (uclamp_is_used() && !uclamp_rq_is_idle(rq)) {
+ rq_util_min = uclamp_rq_get(rq, UCLAMP_MIN);
+ rq_util_max = uclamp_rq_get(rq, UCLAMP_MAX);
+
+ util_min = max(rq_util_min, p_util_min);
+ util_max = max(rq_util_max, p_util_max);
+ }
+
+ fits = util_fits_cpu(util, util_min, util_max, cpu);
+ if (!fits)
+ continue;
+
+ lsub_positive(&cpu_cap, util);
+
+ if ((fits > max_fits_fallback) ||
+ ((fits == max_fits_fallback) &&
+ ((long)cpu_cap > max_spare_cap_fallback))) {
+ max_spare_cap_fallback = cpu_cap;
+ max_spare_cap_cpu_fallback = cpu;
+ max_fits_fallback = fits;
+ }
+ }
+
+ if (max_spare_cap_cpu < 0 && max_spare_cap_cpu_fallback >= 0) {
+ max_spare_cap = max_spare_cap_fallback;
+ max_spare_cap_cpu = max_spare_cap_cpu_fallback;
+ max_fits = max_fits_fallback;
}
if (max_spare_cap_cpu < 0 && prev_spare_cap < 0)
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 061f8c85f5552..cb060fe56aec1 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -232,15 +232,6 @@ static bool sched_is_eas_possible(const struct cpumask *cpu_mask)
return false;
}
- /* EAS definitely does *not* handle SMT */
- if (sched_smt_active()) {
- if (sched_debug()) {
- pr_info("rd %*pbl: Checking EAS, SMT is not supported\n",
- cpumask_pr_args(cpu_mask));
- }
- return false;
- }
-
if (!arch_scale_freq_invariant()) {
if (sched_debug()) {
pr_info("rd %*pbl: Checking EAS: frequency-invariant load tracking not yet supported",
--
2.53.0
On Thu, 26 Mar 2026 at 16:12, Andrea Righi <arighi@nvidia.com> wrote:
>
> Drop the sched_is_eas_possible() guard that rejects EAS whenever SMT is
> active. This allows to enable EAS and perf-domain setup to succeed on
> SD_ASYM_CPUCAPACITY topologies with SMT enabled.
I don't think that we want to enable EAS with SMT. So keep EAS and SMT
exclusive, at least for now
>
> Moreover, apply to find_energy_efficient_cpu() the same SMT-aware
> preference as the non-EAS wakeup path: when SMT is active and there is a
> fully-idle core in the relevant domain, prefer max-spare-capacity
> candidates on fully-idle cores. Otherwise, fall back to the prior
> behavior, to include also partially-idle SMT siblings.
>
> Cc: Vincent Guittot <vincent.guittot@linaro.org>
> Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
> Cc: Christian Loehle <christian.loehle@arm.com>
> Cc: Koba Ko <kobak@nvidia.com>
> Reported-by: Felix Abecassis <fabecassis@nvidia.com>
> Signed-off-by: Andrea Righi <arighi@nvidia.com>
> ---
> kernel/sched/fair.c | 50 +++++++++++++++++++++++++++++++++++++++--
> kernel/sched/topology.c | 9 --------
> 2 files changed, 48 insertions(+), 11 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index f8deaaa5bfc85..593a89f688679 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -8658,13 +8658,15 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
> eenv_task_busy_time(&eenv, p, prev_cpu);
>
> for (; pd; pd = pd->next) {
> - unsigned long util_min = p_util_min, util_max = p_util_max;
> unsigned long cpu_cap, cpu_actual_cap, util;
> long prev_spare_cap = -1, max_spare_cap = -1;
> + long max_spare_cap_fallback = -1;
> unsigned long rq_util_min, rq_util_max;
> unsigned long cur_delta, base_energy;
> - int max_spare_cap_cpu = -1;
> + int max_spare_cap_cpu = -1, max_spare_cap_cpu_fallback = -1;
> int fits, max_fits = -1;
> + int max_fits_fallback = -1;
> + bool prefer_idle_cores;
>
> if (!cpumask_and(cpus, perf_domain_span(pd), cpu_online_mask))
> continue;
> @@ -8676,6 +8678,8 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
> eenv.cpu_cap = cpu_actual_cap;
> eenv.pd_cap = 0;
>
> + prefer_idle_cores = sched_smt_active() && test_idle_cores(prev_cpu);
> +
> for_each_cpu(cpu, cpus) {
> struct rq *rq = cpu_rq(cpu);
>
> @@ -8687,6 +8691,11 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
> if (!cpumask_test_cpu(cpu, p->cpus_ptr))
> continue;
>
> + if (prefer_idle_cores && cpu != prev_cpu && !is_core_idle(cpu))
> + goto fallback;
> +
> + unsigned long util_min = p_util_min, util_max = p_util_max;
> +
> util = cpu_util(cpu, p, cpu, 0);
> cpu_cap = capacity_of(cpu);
>
> @@ -8733,6 +8742,43 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
> max_spare_cap_cpu = cpu;
> max_fits = fits;
> }
> +
> +fallback:
> + if (!prefer_idle_cores || cpu == prev_cpu || is_core_idle(cpu))
> + continue;
> +
> + util_min = p_util_min;
> + util_max = p_util_max;
> + util = cpu_util(cpu, p, cpu, 0);
> + cpu_cap = capacity_of(cpu);
> +
> + if (uclamp_is_used() && !uclamp_rq_is_idle(rq)) {
> + rq_util_min = uclamp_rq_get(rq, UCLAMP_MIN);
> + rq_util_max = uclamp_rq_get(rq, UCLAMP_MAX);
> +
> + util_min = max(rq_util_min, p_util_min);
> + util_max = max(rq_util_max, p_util_max);
> + }
> +
> + fits = util_fits_cpu(util, util_min, util_max, cpu);
> + if (!fits)
> + continue;
> +
> + lsub_positive(&cpu_cap, util);
> +
> + if ((fits > max_fits_fallback) ||
> + ((fits == max_fits_fallback) &&
> + ((long)cpu_cap > max_spare_cap_fallback))) {
> + max_spare_cap_fallback = cpu_cap;
> + max_spare_cap_cpu_fallback = cpu;
> + max_fits_fallback = fits;
> + }
> + }
> +
> + if (max_spare_cap_cpu < 0 && max_spare_cap_cpu_fallback >= 0) {
> + max_spare_cap = max_spare_cap_fallback;
> + max_spare_cap_cpu = max_spare_cap_cpu_fallback;
> + max_fits = max_fits_fallback;
> }
>
> if (max_spare_cap_cpu < 0 && prev_spare_cap < 0)
> diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
> index 061f8c85f5552..cb060fe56aec1 100644
> --- a/kernel/sched/topology.c
> +++ b/kernel/sched/topology.c
> @@ -232,15 +232,6 @@ static bool sched_is_eas_possible(const struct cpumask *cpu_mask)
> return false;
> }
>
> - /* EAS definitely does *not* handle SMT */
> - if (sched_smt_active()) {
> - if (sched_debug()) {
> - pr_info("rd %*pbl: Checking EAS, SMT is not supported\n",
> - cpumask_pr_args(cpu_mask));
> - }
> - return false;
> - }
> -
> if (!arch_scale_freq_invariant()) {
> if (sched_debug()) {
> pr_info("rd %*pbl: Checking EAS: frequency-invariant load tracking not yet supported",
> --
> 2.53.0
>
On Fri, Mar 27, 2026 at 09:09:35AM +0100, Vincent Guittot wrote:
> On Thu, 26 Mar 2026 at 16:12, Andrea Righi <arighi@nvidia.com> wrote:
> >
> > Drop the sched_is_eas_possible() guard that rejects EAS whenever SMT is
> > active. This allows to enable EAS and perf-domain setup to succeed on
> > SD_ASYM_CPUCAPACITY topologies with SMT enabled.
>
> I don't think that we want to enable EAS with SMT. So keep EAS and SMT
> exclusive, at least for now
Ack.
Thanks,
-Andrea
>
>
> >
> > Moreover, apply to find_energy_efficient_cpu() the same SMT-aware
> > preference as the non-EAS wakeup path: when SMT is active and there is a
> > fully-idle core in the relevant domain, prefer max-spare-capacity
> > candidates on fully-idle cores. Otherwise, fall back to the prior
> > behavior, to include also partially-idle SMT siblings.
> >
> > Cc: Vincent Guittot <vincent.guittot@linaro.org>
> > Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
> > Cc: Christian Loehle <christian.loehle@arm.com>
> > Cc: Koba Ko <kobak@nvidia.com>
> > Reported-by: Felix Abecassis <fabecassis@nvidia.com>
> > Signed-off-by: Andrea Righi <arighi@nvidia.com>
> > ---
> > kernel/sched/fair.c | 50 +++++++++++++++++++++++++++++++++++++++--
> > kernel/sched/topology.c | 9 --------
> > 2 files changed, 48 insertions(+), 11 deletions(-)
> >
> > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> > index f8deaaa5bfc85..593a89f688679 100644
> > --- a/kernel/sched/fair.c
> > +++ b/kernel/sched/fair.c
> > @@ -8658,13 +8658,15 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
> > eenv_task_busy_time(&eenv, p, prev_cpu);
> >
> > for (; pd; pd = pd->next) {
> > - unsigned long util_min = p_util_min, util_max = p_util_max;
> > unsigned long cpu_cap, cpu_actual_cap, util;
> > long prev_spare_cap = -1, max_spare_cap = -1;
> > + long max_spare_cap_fallback = -1;
> > unsigned long rq_util_min, rq_util_max;
> > unsigned long cur_delta, base_energy;
> > - int max_spare_cap_cpu = -1;
> > + int max_spare_cap_cpu = -1, max_spare_cap_cpu_fallback = -1;
> > int fits, max_fits = -1;
> > + int max_fits_fallback = -1;
> > + bool prefer_idle_cores;
> >
> > if (!cpumask_and(cpus, perf_domain_span(pd), cpu_online_mask))
> > continue;
> > @@ -8676,6 +8678,8 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
> > eenv.cpu_cap = cpu_actual_cap;
> > eenv.pd_cap = 0;
> >
> > + prefer_idle_cores = sched_smt_active() && test_idle_cores(prev_cpu);
> > +
> > for_each_cpu(cpu, cpus) {
> > struct rq *rq = cpu_rq(cpu);
> >
> > @@ -8687,6 +8691,11 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
> > if (!cpumask_test_cpu(cpu, p->cpus_ptr))
> > continue;
> >
> > + if (prefer_idle_cores && cpu != prev_cpu && !is_core_idle(cpu))
> > + goto fallback;
> > +
> > + unsigned long util_min = p_util_min, util_max = p_util_max;
> > +
> > util = cpu_util(cpu, p, cpu, 0);
> > cpu_cap = capacity_of(cpu);
> >
> > @@ -8733,6 +8742,43 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
> > max_spare_cap_cpu = cpu;
> > max_fits = fits;
> > }
> > +
> > +fallback:
> > + if (!prefer_idle_cores || cpu == prev_cpu || is_core_idle(cpu))
> > + continue;
> > +
> > + util_min = p_util_min;
> > + util_max = p_util_max;
> > + util = cpu_util(cpu, p, cpu, 0);
> > + cpu_cap = capacity_of(cpu);
> > +
> > + if (uclamp_is_used() && !uclamp_rq_is_idle(rq)) {
> > + rq_util_min = uclamp_rq_get(rq, UCLAMP_MIN);
> > + rq_util_max = uclamp_rq_get(rq, UCLAMP_MAX);
> > +
> > + util_min = max(rq_util_min, p_util_min);
> > + util_max = max(rq_util_max, p_util_max);
> > + }
> > +
> > + fits = util_fits_cpu(util, util_min, util_max, cpu);
> > + if (!fits)
> > + continue;
> > +
> > + lsub_positive(&cpu_cap, util);
> > +
> > + if ((fits > max_fits_fallback) ||
> > + ((fits == max_fits_fallback) &&
> > + ((long)cpu_cap > max_spare_cap_fallback))) {
> > + max_spare_cap_fallback = cpu_cap;
> > + max_spare_cap_cpu_fallback = cpu;
> > + max_fits_fallback = fits;
> > + }
> > + }
> > +
> > + if (max_spare_cap_cpu < 0 && max_spare_cap_cpu_fallback >= 0) {
> > + max_spare_cap = max_spare_cap_fallback;
> > + max_spare_cap_cpu = max_spare_cap_cpu_fallback;
> > + max_fits = max_fits_fallback;
> > }
> >
> > if (max_spare_cap_cpu < 0 && prev_spare_cap < 0)
> > diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
> > index 061f8c85f5552..cb060fe56aec1 100644
> > --- a/kernel/sched/topology.c
> > +++ b/kernel/sched/topology.c
> > @@ -232,15 +232,6 @@ static bool sched_is_eas_possible(const struct cpumask *cpu_mask)
> > return false;
> > }
> >
> > - /* EAS definitely does *not* handle SMT */
> > - if (sched_smt_active()) {
> > - if (sched_debug()) {
> > - pr_info("rd %*pbl: Checking EAS, SMT is not supported\n",
> > - cpumask_pr_args(cpu_mask));
> > - }
> > - return false;
> > - }
> > -
> > if (!arch_scale_freq_invariant()) {
> > if (sched_debug()) {
> > pr_info("rd %*pbl: Checking EAS: frequency-invariant load tracking not yet supported",
> > --
> > 2.53.0
> >
© 2016 - 2026 Red Hat, Inc.