select_task_rq_fair() is always called with p->pi_lock held and IRQs
disabled which makes it equivalent of an RCU read-side.
Since commit 71fedc41c23b ("sched/fair: Switch to
rcu_dereference_all()") switched to using rcu_dereference_all() in the
wakeup path, drop the explicit rcu_read_{lock,unlock}() in the fair
task's wakeup path.
Future plans to reuse select_task_rq_fair() /
find_energy_efficient_cpu() in the fair class' balance callback will do
so with IRQs disabled and will comply with the requirements of
rcu_dereference_all() which makes this safe keeping in mind future
development plans too.
Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
---
Changelog v3..v4:
o No changes.
---
kernel/sched/fair.c | 33 ++++++++++++---------------------
1 file changed, 12 insertions(+), 21 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index d57c02e82f3a..28853c0abb83 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8570,10 +8570,9 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
struct perf_domain *pd;
struct energy_env eenv;
- rcu_read_lock();
pd = rcu_dereference_all(rd->pd);
if (!pd)
- goto unlock;
+ return target;
/*
* Energy-aware wake-up happens on the lowest sched_domain starting
@@ -8583,13 +8582,13 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
while (sd && !cpumask_test_cpu(prev_cpu, sched_domain_span(sd)))
sd = sd->parent;
if (!sd)
- goto unlock;
+ return target;
target = prev_cpu;
sync_entity_load_avg(&p->se);
if (!task_util_est(p) && p_util_min == 0)
- goto unlock;
+ return target;
eenv_task_busy_time(&eenv, p, prev_cpu);
@@ -8684,7 +8683,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
prev_cpu);
/* CPU utilization has changed */
if (prev_delta < base_energy)
- goto unlock;
+ return target;
prev_delta -= base_energy;
prev_actual_cap = cpu_actual_cap;
best_delta = min(best_delta, prev_delta);
@@ -8708,7 +8707,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
max_spare_cap_cpu);
/* CPU utilization has changed */
if (cur_delta < base_energy)
- goto unlock;
+ return target;
cur_delta -= base_energy;
/*
@@ -8725,7 +8724,6 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
best_actual_cap = cpu_actual_cap;
}
}
- rcu_read_unlock();
if ((best_fits > prev_fits) ||
((best_fits > 0) && (best_delta < prev_delta)) ||
@@ -8733,11 +8731,6 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
target = best_energy_cpu;
return target;
-
-unlock:
- rcu_read_unlock();
-
- return target;
}
/*
@@ -8782,7 +8775,6 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
want_affine = !wake_wide(p) && cpumask_test_cpu(cpu, p->cpus_ptr);
}
- rcu_read_lock();
for_each_domain(cpu, tmp) {
/*
* If both 'cpu' and 'prev_cpu' are part of this domain,
@@ -8808,14 +8800,13 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
break;
}
- if (unlikely(sd)) {
- /* Slow path */
- new_cpu = sched_balance_find_dst_cpu(sd, p, cpu, prev_cpu, sd_flag);
- } else if (wake_flags & WF_TTWU) { /* XXX always ? */
- /* Fast path */
- new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
- }
- rcu_read_unlock();
+ /* Slow path */
+ if (unlikely(sd))
+ return sched_balance_find_dst_cpu(sd, p, cpu, prev_cpu, sd_flag);
+
+ /* Fast path */
+ if (wake_flags & WF_TTWU)
+ return select_idle_sibling(p, prev_cpu, new_cpu);
return new_cpu;
}
--
2.34.1
On 12.03.26 05:44, K Prateek Nayak wrote:
> select_task_rq_fair() is always called with p->pi_lock held and IRQs
> disabled which makes it equivalent of an RCU read-side.
>
> Since commit 71fedc41c23b ("sched/fair: Switch to
> rcu_dereference_all()") switched to using rcu_dereference_all() in the
> wakeup path, drop the explicit rcu_read_{lock,unlock}() in the fair
> task's wakeup path.
>
> Future plans to reuse select_task_rq_fair() /
> find_energy_efficient_cpu() in the fair class' balance callback will do
> so with IRQs disabled and will comply with the requirements of
> rcu_dereference_all() which makes this safe keeping in mind future
> development plans too.
>
> Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
> ---
> Changelog v3..v4:
>
> o No changes.
> ---
> kernel/sched/fair.c | 33 ++++++++++++---------------------
> 1 file changed, 12 insertions(+), 21 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index d57c02e82f3a..28853c0abb83 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -8570,10 +8570,9 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
> struct perf_domain *pd;
> struct energy_env eenv;
>
> - rcu_read_lock();
> pd = rcu_dereference_all(rd->pd);
Got an RCU related warning when running EAS:
[ 3.795872] EM: rcu read lock needed
[ 3.795903] WARNING: ./include/linux/energy_model.h:251 at compute_energy+0x3
a4/0x3bc, CPU#1: swapper/1/0
[ 3.813755] Modules linked in:
[ 3.816844] CPU: 1 UID: 0 PID: 0 Comm: swapper/1 Not tainted 7.0.0-rc2-00295-
g93189edc73c8-dirty #30 PREEMPT
[ 3.826807] Hardware name: ARM Juno development board (r0) (DT)
[ 3.832752] pstate: 600000c5 (nZCv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[ 3.839750] pc : compute_energy+0x3a4/0x3bc
[ 3.843970] lr : compute_energy+0x3a4/0x3bc
[ 3.848185] sp : ffff8000838f39e0
[ 3.851518] x29: ffff8000838f3a00 x28: ffff0008008fe560 x27: 00000000000000f9
[ 3.858717] x26: ffff800082702250 x25: ffff000803211e00 x24: ffff8000838f3af8
[ 3.865913] x23: ffff0008009a3580 x22: 00000000000000f9 x21: 00000000ffffffff
[ 3.873108] x20: ffff0008030377c0 x19: 00000000000000b7 x18: 0000000000000028
[ 3.880303] x17: ffff8008fc91f000 x16: ffff8000838f0000 x15: 0000000000000007
[ 3.887497] x14: fffffffffffc73af x13: 0a64656465656e20 x12: 0000000000008000
[ 3.894690] x11: ffff80008272ca18 x10: ffff8000825f7000 x9 : 0000000000000050
[ 3.901884] x8 : 0000000000000008 x7 : ffff800080190100 x6 : 0000000000000001
[ 3.909076] x5 : 0000000000000161 x4 : 0000000000000000 x3 : 0000000000000001
[ 3.916268] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff0008002e1ac0
[ 3.923461] Call trace:
[ 3.925924] compute_energy+0x3a4/0x3bc (P)
[ 3.930147] select_task_rq_fair+0x590/0x1990
[ 3.934541] try_to_wake_up+0x1f8/0xac4
[ 3.938420] wake_up_process+0x18/0x24
[ 3.942208] process_timeout+0x14/0x20
[ 3.945995] call_timer_fn+0xb8/0x470
We need to adapt em_cpu_energy() in include/linux/energy_model.h as
well.
---8<---
From 74f9067751b02f4bd0934ba6d47f2a204c763abe Mon Sep 17 00:00:00 2001
From: Dietmar Eggemann <dietmar.eggemann@arm.com>
Date: Sun, 15 Mar 2026 23:45:39 +0100
Subject: [PATCH] PM: EM: Switch to rcu_dereference_all() in wakeup path
em_cpu_energy() is part of the EAS (Fair) task wakeup path. Now that
rcu_read_{,un}lock() have been removed from find_energy_efficient_cpu()
switch to rcu_dereference_all() and check for rcu_read_lock_any_held()
in em_cpu_energy() as well.
In EAS (Fair) task wakeup path is a preempt/IRQ disabled region, so
rcu_read_{,un}lock() can be removed.
Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
---
include/linux/energy_model.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h
index e7497f804644..c909a8ba22e8 100644
--- a/include/linux/energy_model.h
+++ b/include/linux/energy_model.h
@@ -248,7 +248,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
struct em_perf_state *ps;
int i;
- WARN_ONCE(!rcu_read_lock_held(), "EM: rcu read lock needed\n");
+ lockdep_assert(rcu_read_lock_any_held());
if (!sum_util)
return 0;
@@ -267,7 +267,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
* Find the lowest performance state of the Energy Model above the
* requested performance.
*/
- em_table = rcu_dereference(pd->em_table);
+ em_table = rcu_dereference_all(pd->em_table);
i = em_pd_get_efficient_state(em_table->state, pd, max_util);
ps = &em_table->state[i];
--
2.43.0
Hello Dietmar,
On 3/16/2026 5:06 AM, Dietmar Eggemann wrote:
> We need to adapt em_cpu_energy() in include/linux/energy_model.h as
> well.
I clearly didn't test the EAS bits! I'll make sure to spin something up
with QEMU next time to hit these paths.
>
> ---8<---
>
> From 74f9067751b02f4bd0934ba6d47f2a204c763abe Mon Sep 17 00:00:00 2001
> From: Dietmar Eggemann <dietmar.eggemann@arm.com>
> Date: Sun, 15 Mar 2026 23:45:39 +0100
> Subject: [PATCH] PM: EM: Switch to rcu_dereference_all() in wakeup path
>
> em_cpu_energy() is part of the EAS (Fair) task wakeup path. Now that
> rcu_read_{,un}lock() have been removed from find_energy_efficient_cpu()
> switch to rcu_dereference_all() and check for rcu_read_lock_any_held()
> in em_cpu_energy() as well.
> In EAS (Fair) task wakeup path is a preempt/IRQ disabled region, so
> rcu_read_{,un}lock() can be removed.
>
> Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Thank you for catching my mistake and for the fix! Feel free to
include:
Reviewed-by: K Prateek Nayak <kprateek.nayak@amd.com>
> ---
> include/linux/energy_model.h | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h
> index e7497f804644..c909a8ba22e8 100644
> --- a/include/linux/energy_model.h
> +++ b/include/linux/energy_model.h
> @@ -248,7 +248,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
> struct em_perf_state *ps;
> int i;
>
> - WARN_ONCE(!rcu_read_lock_held(), "EM: rcu read lock needed\n");
> + lockdep_assert(rcu_read_lock_any_held());
>
> if (!sum_util)
> return 0;
> @@ -267,7 +267,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
> * Find the lowest performance state of the Energy Model above the
> * requested performance.
> */
> - em_table = rcu_dereference(pd->em_table);
> + em_table = rcu_dereference_all(pd->em_table);
> i = em_pd_get_efficient_state(em_table->state, pd, max_util);
> ps = &em_table->state[i];
>
--
Thanks and Regards,
Prateek
The following commit has been merged into the sched/core branch of tip:
Commit-ID: 8ca12326f592f7554acf2788ecb1c5c954dcf31c
Gitweb: https://git.kernel.org/tip/8ca12326f592f7554acf2788ecb1c5c954dcf31c
Author: Dietmar Eggemann <dietmar.eggemann@arm.com>
AuthorDate: Mon, 16 Mar 2026 00:36:22 +01:00
Committer: Peter Zijlstra <peterz@infradead.org>
CommitterDate: Wed, 18 Mar 2026 09:06:49 +01:00
PM: EM: Switch to rcu_dereference_all() in wakeup path
em_cpu_energy() is part of the EAS (Fair) task wakeup path. Now that
rcu_read_{,un}lock() have been removed from find_energy_efficient_cpu()
switch to rcu_dereference_all() and check for rcu_read_lock_any_held()
in em_cpu_energy() as well.
In EAS (Fair) task wakeup path is a preempt/IRQ disabled region, so
rcu_read_{,un}lock() can be removed.
Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: K Prateek Nayak <kprateek.nayak@amd.com>
Link: https://patch.msgid.link/5b1228b7-5949-4a45-9f62-e8ce936de694@arm.com
---
include/linux/energy_model.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h
index e7497f8..c909a8b 100644
--- a/include/linux/energy_model.h
+++ b/include/linux/energy_model.h
@@ -248,7 +248,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
struct em_perf_state *ps;
int i;
- WARN_ONCE(!rcu_read_lock_held(), "EM: rcu read lock needed\n");
+ lockdep_assert(rcu_read_lock_any_held());
if (!sum_util)
return 0;
@@ -267,7 +267,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
* Find the lowest performance state of the Energy Model above the
* requested performance.
*/
- em_table = rcu_dereference(pd->em_table);
+ em_table = rcu_dereference_all(pd->em_table);
i = em_pd_get_efficient_state(em_table->state, pd, max_util);
ps = &em_table->state[i];
The following commit has been merged into the sched/core branch of tip:
Commit-ID: fa6874dfeee06352ce7c4c271be6a25d84a38b54
Gitweb: https://git.kernel.org/tip/fa6874dfeee06352ce7c4c271be6a25d84a38b54
Author: K Prateek Nayak <kprateek.nayak@amd.com>
AuthorDate: Thu, 12 Mar 2026 04:44:32
Committer: Peter Zijlstra <peterz@infradead.org>
CommitterDate: Wed, 18 Mar 2026 09:06:50 +01:00
sched/fair: Remove superfluous rcu_read_lock() in the wakeup path
select_task_rq_fair() is always called with p->pi_lock held and IRQs
disabled which makes it equivalent of an RCU read-side.
Since commit 71fedc41c23b ("sched/fair: Switch to
rcu_dereference_all()") switched to using rcu_dereference_all() in the
wakeup path, drop the explicit rcu_read_{lock,unlock}() in the fair
task's wakeup path.
Future plans to reuse select_task_rq_fair() /
find_energy_efficient_cpu() in the fair class' balance callback will do
so with IRQs disabled and will comply with the requirements of
rcu_dereference_all() which makes this safe keeping in mind future
development plans too.
Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Tested-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Link: https://patch.msgid.link/20260312044434.1974-8-kprateek.nayak@amd.com
---
kernel/sched/fair.c | 33 ++++++++++++---------------------
1 file changed, 12 insertions(+), 21 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c1e5c82..3e24d3e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8570,10 +8570,9 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
struct perf_domain *pd;
struct energy_env eenv;
- rcu_read_lock();
pd = rcu_dereference_all(rd->pd);
if (!pd)
- goto unlock;
+ return target;
/*
* Energy-aware wake-up happens on the lowest sched_domain starting
@@ -8583,13 +8582,13 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
while (sd && !cpumask_test_cpu(prev_cpu, sched_domain_span(sd)))
sd = sd->parent;
if (!sd)
- goto unlock;
+ return target;
target = prev_cpu;
sync_entity_load_avg(&p->se);
if (!task_util_est(p) && p_util_min == 0)
- goto unlock;
+ return target;
eenv_task_busy_time(&eenv, p, prev_cpu);
@@ -8684,7 +8683,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
prev_cpu);
/* CPU utilization has changed */
if (prev_delta < base_energy)
- goto unlock;
+ return target;
prev_delta -= base_energy;
prev_actual_cap = cpu_actual_cap;
best_delta = min(best_delta, prev_delta);
@@ -8708,7 +8707,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
max_spare_cap_cpu);
/* CPU utilization has changed */
if (cur_delta < base_energy)
- goto unlock;
+ return target;
cur_delta -= base_energy;
/*
@@ -8725,7 +8724,6 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
best_actual_cap = cpu_actual_cap;
}
}
- rcu_read_unlock();
if ((best_fits > prev_fits) ||
((best_fits > 0) && (best_delta < prev_delta)) ||
@@ -8733,11 +8731,6 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
target = best_energy_cpu;
return target;
-
-unlock:
- rcu_read_unlock();
-
- return target;
}
/*
@@ -8782,7 +8775,6 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
want_affine = !wake_wide(p) && cpumask_test_cpu(cpu, p->cpus_ptr);
}
- rcu_read_lock();
for_each_domain(cpu, tmp) {
/*
* If both 'cpu' and 'prev_cpu' are part of this domain,
@@ -8808,14 +8800,13 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
break;
}
- if (unlikely(sd)) {
- /* Slow path */
- new_cpu = sched_balance_find_dst_cpu(sd, p, cpu, prev_cpu, sd_flag);
- } else if (wake_flags & WF_TTWU) { /* XXX always ? */
- /* Fast path */
- new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
- }
- rcu_read_unlock();
+ /* Slow path */
+ if (unlikely(sd))
+ return sched_balance_find_dst_cpu(sd, p, cpu, prev_cpu, sd_flag);
+
+ /* Fast path */
+ if (wake_flags & WF_TTWU)
+ return select_idle_sibling(p, prev_cpu, new_cpu);
return new_cpu;
}
© 2016 - 2026 Red Hat, Inc.