[PATCH v4 7/9] sched/fair: Remove superfluous rcu_read_lock() in the wakeup path

K Prateek Nayak posted 9 patches 3 weeks, 5 days ago
[PATCH v4 7/9] sched/fair: Remove superfluous rcu_read_lock() in the wakeup path
Posted by K Prateek Nayak 3 weeks, 5 days ago
select_task_rq_fair() is always called with p->pi_lock held and IRQs
disabled which makes it equivalent of an RCU read-side.

Since commit 71fedc41c23b ("sched/fair: Switch to
rcu_dereference_all()") switched to using rcu_dereference_all() in the
wakeup path, drop the explicit rcu_read_{lock,unlock}() in the fair
task's wakeup path.

Future plans to reuse select_task_rq_fair() /
find_energy_efficient_cpu() in the fair class' balance callback will do
so with IRQs disabled and will comply with the requirements of
rcu_dereference_all() which makes this safe keeping in mind future
development plans too.

Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
---
Changelog v3..v4:

o No changes.
---
 kernel/sched/fair.c | 33 ++++++++++++---------------------
 1 file changed, 12 insertions(+), 21 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index d57c02e82f3a..28853c0abb83 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8570,10 +8570,9 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
 	struct perf_domain *pd;
 	struct energy_env eenv;
 
-	rcu_read_lock();
 	pd = rcu_dereference_all(rd->pd);
 	if (!pd)
-		goto unlock;
+		return target;
 
 	/*
 	 * Energy-aware wake-up happens on the lowest sched_domain starting
@@ -8583,13 +8582,13 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
 	while (sd && !cpumask_test_cpu(prev_cpu, sched_domain_span(sd)))
 		sd = sd->parent;
 	if (!sd)
-		goto unlock;
+		return target;
 
 	target = prev_cpu;
 
 	sync_entity_load_avg(&p->se);
 	if (!task_util_est(p) && p_util_min == 0)
-		goto unlock;
+		return target;
 
 	eenv_task_busy_time(&eenv, p, prev_cpu);
 
@@ -8684,7 +8683,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
 						    prev_cpu);
 			/* CPU utilization has changed */
 			if (prev_delta < base_energy)
-				goto unlock;
+				return target;
 			prev_delta -= base_energy;
 			prev_actual_cap = cpu_actual_cap;
 			best_delta = min(best_delta, prev_delta);
@@ -8708,7 +8707,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
 						   max_spare_cap_cpu);
 			/* CPU utilization has changed */
 			if (cur_delta < base_energy)
-				goto unlock;
+				return target;
 			cur_delta -= base_energy;
 
 			/*
@@ -8725,7 +8724,6 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
 			best_actual_cap = cpu_actual_cap;
 		}
 	}
-	rcu_read_unlock();
 
 	if ((best_fits > prev_fits) ||
 	    ((best_fits > 0) && (best_delta < prev_delta)) ||
@@ -8733,11 +8731,6 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
 		target = best_energy_cpu;
 
 	return target;
-
-unlock:
-	rcu_read_unlock();
-
-	return target;
 }
 
 /*
@@ -8782,7 +8775,6 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
 		want_affine = !wake_wide(p) && cpumask_test_cpu(cpu, p->cpus_ptr);
 	}
 
-	rcu_read_lock();
 	for_each_domain(cpu, tmp) {
 		/*
 		 * If both 'cpu' and 'prev_cpu' are part of this domain,
@@ -8808,14 +8800,13 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
 			break;
 	}
 
-	if (unlikely(sd)) {
-		/* Slow path */
-		new_cpu = sched_balance_find_dst_cpu(sd, p, cpu, prev_cpu, sd_flag);
-	} else if (wake_flags & WF_TTWU) { /* XXX always ? */
-		/* Fast path */
-		new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
-	}
-	rcu_read_unlock();
+	/* Slow path */
+	if (unlikely(sd))
+		return sched_balance_find_dst_cpu(sd, p, cpu, prev_cpu, sd_flag);
+
+	/* Fast path */
+	if (wake_flags & WF_TTWU)
+		return select_idle_sibling(p, prev_cpu, new_cpu);
 
 	return new_cpu;
 }
-- 
2.34.1
Re: [PATCH v4 7/9] sched/fair: Remove superfluous rcu_read_lock() in the wakeup path
Posted by Dietmar Eggemann 3 weeks, 1 day ago
On 12.03.26 05:44, K Prateek Nayak wrote:
> select_task_rq_fair() is always called with p->pi_lock held and IRQs
> disabled which makes it equivalent of an RCU read-side.
> 
> Since commit 71fedc41c23b ("sched/fair: Switch to
> rcu_dereference_all()") switched to using rcu_dereference_all() in the
> wakeup path, drop the explicit rcu_read_{lock,unlock}() in the fair
> task's wakeup path.
> 
> Future plans to reuse select_task_rq_fair() /
> find_energy_efficient_cpu() in the fair class' balance callback will do
> so with IRQs disabled and will comply with the requirements of
> rcu_dereference_all() which makes this safe keeping in mind future
> development plans too.
> 
> Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
> ---
> Changelog v3..v4:
> 
> o No changes.
> ---
>  kernel/sched/fair.c | 33 ++++++++++++---------------------
>  1 file changed, 12 insertions(+), 21 deletions(-)
> 
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index d57c02e82f3a..28853c0abb83 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -8570,10 +8570,9 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
>  	struct perf_domain *pd;
>  	struct energy_env eenv;
>  
> -	rcu_read_lock();
>  	pd = rcu_dereference_all(rd->pd);
Got an RCU related warning when running EAS:

[    3.795872] EM: rcu read lock needed
[    3.795903] WARNING: ./include/linux/energy_model.h:251 at compute_energy+0x3
a4/0x3bc, CPU#1: swapper/1/0
[    3.813755] Modules linked in:
[    3.816844] CPU: 1 UID: 0 PID: 0 Comm: swapper/1 Not tainted 7.0.0-rc2-00295-
g93189edc73c8-dirty #30 PREEMPT
[    3.826807] Hardware name: ARM Juno development board (r0) (DT)
[    3.832752] pstate: 600000c5 (nZCv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[    3.839750] pc : compute_energy+0x3a4/0x3bc
[    3.843970] lr : compute_energy+0x3a4/0x3bc
[    3.848185] sp : ffff8000838f39e0
[    3.851518] x29: ffff8000838f3a00 x28: ffff0008008fe560 x27: 00000000000000f9
[    3.858717] x26: ffff800082702250 x25: ffff000803211e00 x24: ffff8000838f3af8
[    3.865913] x23: ffff0008009a3580 x22: 00000000000000f9 x21: 00000000ffffffff
[    3.873108] x20: ffff0008030377c0 x19: 00000000000000b7 x18: 0000000000000028
[    3.880303] x17: ffff8008fc91f000 x16: ffff8000838f0000 x15: 0000000000000007
[    3.887497] x14: fffffffffffc73af x13: 0a64656465656e20 x12: 0000000000008000
[    3.894690] x11: ffff80008272ca18 x10: ffff8000825f7000 x9 : 0000000000000050
[    3.901884] x8 : 0000000000000008 x7 : ffff800080190100 x6 : 0000000000000001
[    3.909076] x5 : 0000000000000161 x4 : 0000000000000000 x3 : 0000000000000001
[    3.916268] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff0008002e1ac0
[    3.923461] Call trace:
[    3.925924]  compute_energy+0x3a4/0x3bc (P)
[    3.930147]  select_task_rq_fair+0x590/0x1990
[    3.934541]  try_to_wake_up+0x1f8/0xac4
[    3.938420]  wake_up_process+0x18/0x24
[    3.942208]  process_timeout+0x14/0x20
[    3.945995]  call_timer_fn+0xb8/0x470

We need to adapt em_cpu_energy() in include/linux/energy_model.h as
well.

---8<---

From 74f9067751b02f4bd0934ba6d47f2a204c763abe Mon Sep 17 00:00:00 2001
From: Dietmar Eggemann <dietmar.eggemann@arm.com>
Date: Sun, 15 Mar 2026 23:45:39 +0100
Subject: [PATCH] PM: EM: Switch to rcu_dereference_all() in wakeup path

em_cpu_energy() is part of the EAS (Fair) task wakeup path. Now that
rcu_read_{,un}lock() have been removed from find_energy_efficient_cpu()
switch to rcu_dereference_all() and check for rcu_read_lock_any_held()
in em_cpu_energy() as well.
In EAS (Fair) task wakeup path is a preempt/IRQ disabled region, so
rcu_read_{,un}lock() can be removed.

Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
---
 include/linux/energy_model.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h
index e7497f804644..c909a8ba22e8 100644
--- a/include/linux/energy_model.h
+++ b/include/linux/energy_model.h
@@ -248,7 +248,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
 	struct em_perf_state *ps;
 	int i;
 
-	WARN_ONCE(!rcu_read_lock_held(), "EM: rcu read lock needed\n");
+	lockdep_assert(rcu_read_lock_any_held());
 
 	if (!sum_util)
 		return 0;
@@ -267,7 +267,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
 	 * Find the lowest performance state of the Energy Model above the
 	 * requested performance.
 	 */
-	em_table = rcu_dereference(pd->em_table);
+	em_table = rcu_dereference_all(pd->em_table);
 	i = em_pd_get_efficient_state(em_table->state, pd, max_util);
 	ps = &em_table->state[i];
 
-- 
2.43.0
Re: [PATCH v4 7/9] sched/fair: Remove superfluous rcu_read_lock() in the wakeup path
Posted by K Prateek Nayak 3 weeks, 1 day ago
Hello Dietmar,

On 3/16/2026 5:06 AM, Dietmar Eggemann wrote:
> We need to adapt em_cpu_energy() in include/linux/energy_model.h as
> well.

I clearly didn't test the EAS bits! I'll make sure to spin something up
with QEMU next time to hit these paths.

> 
> ---8<---
> 
> From 74f9067751b02f4bd0934ba6d47f2a204c763abe Mon Sep 17 00:00:00 2001
> From: Dietmar Eggemann <dietmar.eggemann@arm.com>
> Date: Sun, 15 Mar 2026 23:45:39 +0100
> Subject: [PATCH] PM: EM: Switch to rcu_dereference_all() in wakeup path
> 
> em_cpu_energy() is part of the EAS (Fair) task wakeup path. Now that
> rcu_read_{,un}lock() have been removed from find_energy_efficient_cpu()
> switch to rcu_dereference_all() and check for rcu_read_lock_any_held()
> in em_cpu_energy() as well.
> In EAS (Fair) task wakeup path is a preempt/IRQ disabled region, so
> rcu_read_{,un}lock() can be removed.
> 
> Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com>

Thank you for catching my mistake and for the fix! Feel free to
include:

Reviewed-by: K Prateek Nayak <kprateek.nayak@amd.com>

> ---
>  include/linux/energy_model.h | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h
> index e7497f804644..c909a8ba22e8 100644
> --- a/include/linux/energy_model.h
> +++ b/include/linux/energy_model.h
> @@ -248,7 +248,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
>  	struct em_perf_state *ps;
>  	int i;
>  
> -	WARN_ONCE(!rcu_read_lock_held(), "EM: rcu read lock needed\n");
> +	lockdep_assert(rcu_read_lock_any_held());
>  
>  	if (!sum_util)
>  		return 0;
> @@ -267,7 +267,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
>  	 * Find the lowest performance state of the Energy Model above the
>  	 * requested performance.
>  	 */
> -	em_table = rcu_dereference(pd->em_table);
> +	em_table = rcu_dereference_all(pd->em_table);
>  	i = em_pd_get_efficient_state(em_table->state, pd, max_util);
>  	ps = &em_table->state[i];
>  

-- 
Thanks and Regards,
Prateek
[tip: sched/core] PM: EM: Switch to rcu_dereference_all() in wakeup path
Posted by tip-bot2 for Dietmar Eggemann 2 weeks, 6 days ago
The following commit has been merged into the sched/core branch of tip:

Commit-ID:     8ca12326f592f7554acf2788ecb1c5c954dcf31c
Gitweb:        https://git.kernel.org/tip/8ca12326f592f7554acf2788ecb1c5c954dcf31c
Author:        Dietmar Eggemann <dietmar.eggemann@arm.com>
AuthorDate:    Mon, 16 Mar 2026 00:36:22 +01:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Wed, 18 Mar 2026 09:06:49 +01:00

PM: EM: Switch to rcu_dereference_all() in wakeup path

em_cpu_energy() is part of the EAS (Fair) task wakeup path. Now that
rcu_read_{,un}lock() have been removed from find_energy_efficient_cpu()
switch to rcu_dereference_all() and check for rcu_read_lock_any_held()
in em_cpu_energy() as well.
In EAS (Fair) task wakeup path is a preempt/IRQ disabled region, so
rcu_read_{,un}lock() can be removed.

Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: K Prateek Nayak <kprateek.nayak@amd.com>
Link: https://patch.msgid.link/5b1228b7-5949-4a45-9f62-e8ce936de694@arm.com
---
 include/linux/energy_model.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h
index e7497f8..c909a8b 100644
--- a/include/linux/energy_model.h
+++ b/include/linux/energy_model.h
@@ -248,7 +248,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
 	struct em_perf_state *ps;
 	int i;
 
-	WARN_ONCE(!rcu_read_lock_held(), "EM: rcu read lock needed\n");
+	lockdep_assert(rcu_read_lock_any_held());
 
 	if (!sum_util)
 		return 0;
@@ -267,7 +267,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
 	 * Find the lowest performance state of the Energy Model above the
 	 * requested performance.
 	 */
-	em_table = rcu_dereference(pd->em_table);
+	em_table = rcu_dereference_all(pd->em_table);
 	i = em_pd_get_efficient_state(em_table->state, pd, max_util);
 	ps = &em_table->state[i];
 
[tip: sched/core] sched/fair: Remove superfluous rcu_read_lock() in the wakeup path
Posted by tip-bot2 for K Prateek Nayak 2 weeks, 6 days ago
The following commit has been merged into the sched/core branch of tip:

Commit-ID:     fa6874dfeee06352ce7c4c271be6a25d84a38b54
Gitweb:        https://git.kernel.org/tip/fa6874dfeee06352ce7c4c271be6a25d84a38b54
Author:        K Prateek Nayak <kprateek.nayak@amd.com>
AuthorDate:    Thu, 12 Mar 2026 04:44:32 
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Wed, 18 Mar 2026 09:06:50 +01:00

sched/fair: Remove superfluous rcu_read_lock() in the wakeup path

select_task_rq_fair() is always called with p->pi_lock held and IRQs
disabled which makes it equivalent of an RCU read-side.

Since commit 71fedc41c23b ("sched/fair: Switch to
rcu_dereference_all()") switched to using rcu_dereference_all() in the
wakeup path, drop the explicit rcu_read_{lock,unlock}() in the fair
task's wakeup path.

Future plans to reuse select_task_rq_fair() /
find_energy_efficient_cpu() in the fair class' balance callback will do
so with IRQs disabled and will comply with the requirements of
rcu_dereference_all() which makes this safe keeping in mind future
development plans too.

Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Tested-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Link: https://patch.msgid.link/20260312044434.1974-8-kprateek.nayak@amd.com
---
 kernel/sched/fair.c | 33 ++++++++++++---------------------
 1 file changed, 12 insertions(+), 21 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c1e5c82..3e24d3e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8570,10 +8570,9 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
 	struct perf_domain *pd;
 	struct energy_env eenv;
 
-	rcu_read_lock();
 	pd = rcu_dereference_all(rd->pd);
 	if (!pd)
-		goto unlock;
+		return target;
 
 	/*
 	 * Energy-aware wake-up happens on the lowest sched_domain starting
@@ -8583,13 +8582,13 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
 	while (sd && !cpumask_test_cpu(prev_cpu, sched_domain_span(sd)))
 		sd = sd->parent;
 	if (!sd)
-		goto unlock;
+		return target;
 
 	target = prev_cpu;
 
 	sync_entity_load_avg(&p->se);
 	if (!task_util_est(p) && p_util_min == 0)
-		goto unlock;
+		return target;
 
 	eenv_task_busy_time(&eenv, p, prev_cpu);
 
@@ -8684,7 +8683,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
 						    prev_cpu);
 			/* CPU utilization has changed */
 			if (prev_delta < base_energy)
-				goto unlock;
+				return target;
 			prev_delta -= base_energy;
 			prev_actual_cap = cpu_actual_cap;
 			best_delta = min(best_delta, prev_delta);
@@ -8708,7 +8707,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
 						   max_spare_cap_cpu);
 			/* CPU utilization has changed */
 			if (cur_delta < base_energy)
-				goto unlock;
+				return target;
 			cur_delta -= base_energy;
 
 			/*
@@ -8725,7 +8724,6 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
 			best_actual_cap = cpu_actual_cap;
 		}
 	}
-	rcu_read_unlock();
 
 	if ((best_fits > prev_fits) ||
 	    ((best_fits > 0) && (best_delta < prev_delta)) ||
@@ -8733,11 +8731,6 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
 		target = best_energy_cpu;
 
 	return target;
-
-unlock:
-	rcu_read_unlock();
-
-	return target;
 }
 
 /*
@@ -8782,7 +8775,6 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
 		want_affine = !wake_wide(p) && cpumask_test_cpu(cpu, p->cpus_ptr);
 	}
 
-	rcu_read_lock();
 	for_each_domain(cpu, tmp) {
 		/*
 		 * If both 'cpu' and 'prev_cpu' are part of this domain,
@@ -8808,14 +8800,13 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
 			break;
 	}
 
-	if (unlikely(sd)) {
-		/* Slow path */
-		new_cpu = sched_balance_find_dst_cpu(sd, p, cpu, prev_cpu, sd_flag);
-	} else if (wake_flags & WF_TTWU) { /* XXX always ? */
-		/* Fast path */
-		new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
-	}
-	rcu_read_unlock();
+	/* Slow path */
+	if (unlikely(sd))
+		return sched_balance_find_dst_cpu(sd, p, cpu, prev_cpu, sd_flag);
+
+	/* Fast path */
+	if (wake_flags & WF_TTWU)
+		return select_idle_sibling(p, prev_cpu, new_cpu);
 
 	return new_cpu;
 }