sched: Random collection of patches

[PATCH 2/5] sched/fair: Avoid rq->lock bouncing in sched_balance_newidle()

Posted by Peter Zijlstra 2 months, 1 week ago

While poking at this code recently I noted we do a pointless
unlock+lock cycle in sched_balance_newidle(). We drop the rq->lock (so
we can balance) but then instantly grab the same rq->lock again in
sched_balance_update_blocked_averages().

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 kernel/sched/fair.c |   27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -9902,15 +9902,11 @@ static unsigned long task_h_load(struct
 }
 #endif /* !CONFIG_FAIR_GROUP_SCHED */
 
-static void sched_balance_update_blocked_averages(int cpu)
+static void __sched_balance_update_blocked_averages(struct rq *rq)
 {
 	bool decayed = false, done = true;
-	struct rq *rq = cpu_rq(cpu);
-	struct rq_flags rf;
 
-	rq_lock_irqsave(rq, &rf);
 	update_blocked_load_tick(rq);
-	update_rq_clock(rq);
 
 	decayed |= __update_blocked_others(rq, &done);
 	decayed |= __update_blocked_fair(rq, &done);
@@ -9918,7 +9914,15 @@ static void sched_balance_update_blocked
 	update_blocked_load_status(rq, !done);
 	if (decayed)
 		cpufreq_update_util(rq, 0);
-	rq_unlock_irqrestore(rq, &rf);
+}
+
+static void sched_balance_update_blocked_averages(int cpu)
+{
+	struct rq *rq = cpu_rq(cpu);
+
+	guard(rq_lock_irqsave)(rq);
+	update_rq_clock(rq);
+	__sched_balance_update_blocked_averages(rq);
 }
 
 /********** Helpers for sched_balance_find_src_group ************************/
@@ -12865,12 +12869,17 @@ static int sched_balance_newidle(struct
 	}
 	rcu_read_unlock();
 
+	/*
+	 * Include sched_balance_update_blocked_averages() in the cost
+	 * calculation because it can be quite costly -- this ensures we skip
+	 * it when avg_idle gets to be very low.
+	 */
+	t0 = sched_clock_cpu(this_cpu);
+	__sched_balance_update_blocked_averages(this_rq);
+
 	rq_modified_clear(this_rq);
 	raw_spin_rq_unlock(this_rq);
 
-	t0 = sched_clock_cpu(this_cpu);
-	sched_balance_update_blocked_averages(this_cpu);
-
 	rcu_read_lock();
 	for_each_domain(this_cpu, sd) {
 		u64 domain_cost;

Re: [PATCH 2/5] sched/fair: Avoid rq->lock bouncing in sched_balance_newidle()

Posted by Shrikanth Hegde 2 months, 1 week ago


On 11/27/25 9:09 PM, Peter Zijlstra wrote:
> While poking at this code recently I noted we do a pointless
> unlock+lock cycle in sched_balance_newidle(). We drop the rq->lock (so
> we can balance) but then instantly grab the same rq->lock again in
> sched_balance_update_blocked_averages().
> 
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
>   kernel/sched/fair.c |   27 ++++++++++++++++++---------
>   1 file changed, 18 insertions(+), 9 deletions(-)
> 
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -9902,15 +9902,11 @@ static unsigned long task_h_load(struct
>   }
>   #endif /* !CONFIG_FAIR_GROUP_SCHED */
>   
> -static void sched_balance_update_blocked_averages(int cpu)
> +static void __sched_balance_update_blocked_averages(struct rq *rq)
>   {
>   	bool decayed = false, done = true;
> -	struct rq *rq = cpu_rq(cpu);
> -	struct rq_flags rf;
>   
> -	rq_lock_irqsave(rq, &rf);
>   	update_blocked_load_tick(rq);
> -	update_rq_clock(rq);
>   
>   	decayed |= __update_blocked_others(rq, &done);
>   	decayed |= __update_blocked_fair(rq, &done);
> @@ -9918,7 +9914,15 @@ static void sched_balance_update_blocked
>   	update_blocked_load_status(rq, !done);
>   	if (decayed)
>   		cpufreq_update_util(rq, 0);
> -	rq_unlock_irqrestore(rq, &rf);
> +}
> +
> +static void sched_balance_update_blocked_averages(int cpu)
> +{
> +	struct rq *rq = cpu_rq(cpu);
> +
> +	guard(rq_lock_irqsave)(rq);
> +	update_rq_clock(rq);
> +	__sched_balance_update_blocked_averages(rq);
>   }
>   
>   /********** Helpers for sched_balance_find_src_group ************************/
> @@ -12865,12 +12869,17 @@ static int sched_balance_newidle(struct
>   	}
>   	rcu_read_unlock();
>   
> +	/*
> +	 * Include sched_balance_update_blocked_averages() in the cost
> +	 * calculation because it can be quite costly -- this ensures we skip
> +	 * it when avg_idle gets to be very low.
> +	 */
> +	t0 = sched_clock_cpu(this_cpu);
> +	__sched_balance_update_blocked_averages(this_rq);
> +

I think we do update_rq_clock earlier as early as __schedule.
no warnings seen.

Reviewed-by: Shrikanth Hegde <sshegde@linux.ibm.com>

[tip: sched/core] sched/fair: Avoid rq->lock bouncing in sched_balance_newidle()

Posted by tip-bot2 for Peter Zijlstra 1 month, 3 weeks ago

The following commit has been merged into the sched/core branch of tip:

Commit-ID:     45e09225085f70b856b7b9f26a18ea767a7e1563
Gitweb:        https://git.kernel.org/tip/45e09225085f70b856b7b9f26a18ea767a7e1563
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Wed, 12 Nov 2025 16:08:23 +01:00
Committer:     Ingo Molnar <mingo@kernel.org>
CommitterDate: Sun, 14 Dec 2025 08:25:02 +01:00

sched/fair: Avoid rq->lock bouncing in sched_balance_newidle()

While poking at this code recently I noted we do a pointless
unlock+lock cycle in sched_balance_newidle(). We drop the rq->lock (so
we can balance) but then instantly grab the same rq->lock again in
sched_balance_update_blocked_averages().

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://patch.msgid.link/20251127154725.532469061@infradead.org
---
 kernel/sched/fair.c | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index aa033e4..708ad01 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -9905,15 +9905,11 @@ static unsigned long task_h_load(struct task_struct *p)
 }
 #endif /* !CONFIG_FAIR_GROUP_SCHED */
 
-static void sched_balance_update_blocked_averages(int cpu)
+static void __sched_balance_update_blocked_averages(struct rq *rq)
 {
 	bool decayed = false, done = true;
-	struct rq *rq = cpu_rq(cpu);
-	struct rq_flags rf;
 
-	rq_lock_irqsave(rq, &rf);
 	update_blocked_load_tick(rq);
-	update_rq_clock(rq);
 
 	decayed |= __update_blocked_others(rq, &done);
 	decayed |= __update_blocked_fair(rq, &done);
@@ -9921,7 +9917,15 @@ static void sched_balance_update_blocked_averages(int cpu)
 	update_blocked_load_status(rq, !done);
 	if (decayed)
 		cpufreq_update_util(rq, 0);
-	rq_unlock_irqrestore(rq, &rf);
+}
+
+static void sched_balance_update_blocked_averages(int cpu)
+{
+	struct rq *rq = cpu_rq(cpu);
+
+	guard(rq_lock_irqsave)(rq);
+	update_rq_clock(rq);
+	__sched_balance_update_blocked_averages(rq);
 }
 
 /********** Helpers for sched_balance_find_src_group ************************/
@@ -12868,12 +12872,17 @@ static int sched_balance_newidle(struct rq *this_rq, struct rq_flags *rf)
 	}
 	rcu_read_unlock();
 
+	/*
+	 * Include sched_balance_update_blocked_averages() in the cost
+	 * calculation because it can be quite costly -- this ensures we skip
+	 * it when avg_idle gets to be very low.
+	 */
+	t0 = sched_clock_cpu(this_cpu);
+	__sched_balance_update_blocked_averages(this_rq);
+
 	rq_modified_clear(this_rq);
 	raw_spin_rq_unlock(this_rq);
 
-	t0 = sched_clock_cpu(this_cpu);
-	sched_balance_update_blocked_averages(this_cpu);
-
 	rcu_read_lock();
 	for_each_domain(this_cpu, sd) {
 		u64 domain_cost;

[PATCH 1/5] sched/fair: Fold the sched_avg update
[PATCH 2/5] sched/fair: Avoid rq->lock bouncing in sched_balance_newidle()
[PATCH 3/5] sched: Change rcu_dereference_check_sched_domain() to rcu-sched
[PATCH 4/5] sched: Add assertions to QUEUE_CLASS
[PATCH 5/5] sched: Rework sched_class::wakeup_preempt() and rq_modified_*()