While poking at this code recently I noted we do a pointless
unlock+lock cycle in sched_balance_newidle(). We drop the rq->lock (so
we can balance) but then instantly grab the same rq->lock again in
sched_balance_update_blocked_averages().
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
kernel/sched/fair.c | 27 ++++++++++++++++++---------
1 file changed, 18 insertions(+), 9 deletions(-)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -9902,15 +9902,11 @@ static unsigned long task_h_load(struct
}
#endif /* !CONFIG_FAIR_GROUP_SCHED */
-static void sched_balance_update_blocked_averages(int cpu)
+static void __sched_balance_update_blocked_averages(struct rq *rq)
{
bool decayed = false, done = true;
- struct rq *rq = cpu_rq(cpu);
- struct rq_flags rf;
- rq_lock_irqsave(rq, &rf);
update_blocked_load_tick(rq);
- update_rq_clock(rq);
decayed |= __update_blocked_others(rq, &done);
decayed |= __update_blocked_fair(rq, &done);
@@ -9918,7 +9914,15 @@ static void sched_balance_update_blocked
update_blocked_load_status(rq, !done);
if (decayed)
cpufreq_update_util(rq, 0);
- rq_unlock_irqrestore(rq, &rf);
+}
+
+static void sched_balance_update_blocked_averages(int cpu)
+{
+ struct rq *rq = cpu_rq(cpu);
+
+ guard(rq_lock_irqsave)(rq);
+ update_rq_clock(rq);
+ __sched_balance_update_blocked_averages(rq);
}
/********** Helpers for sched_balance_find_src_group ************************/
@@ -12865,12 +12869,17 @@ static int sched_balance_newidle(struct
}
rcu_read_unlock();
+ /*
+ * Include sched_balance_update_blocked_averages() in the cost
+ * calculation because it can be quite costly -- this ensures we skip
+ * it when avg_idle gets to be very low.
+ */
+ t0 = sched_clock_cpu(this_cpu);
+ __sched_balance_update_blocked_averages(this_rq);
+
rq_modified_clear(this_rq);
raw_spin_rq_unlock(this_rq);
- t0 = sched_clock_cpu(this_cpu);
- sched_balance_update_blocked_averages(this_cpu);
-
rcu_read_lock();
for_each_domain(this_cpu, sd) {
u64 domain_cost;
On 11/27/25 9:09 PM, Peter Zijlstra wrote:
> While poking at this code recently I noted we do a pointless
> unlock+lock cycle in sched_balance_newidle(). We drop the rq->lock (so
> we can balance) but then instantly grab the same rq->lock again in
> sched_balance_update_blocked_averages().
>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
> kernel/sched/fair.c | 27 ++++++++++++++++++---------
> 1 file changed, 18 insertions(+), 9 deletions(-)
>
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -9902,15 +9902,11 @@ static unsigned long task_h_load(struct
> }
> #endif /* !CONFIG_FAIR_GROUP_SCHED */
>
> -static void sched_balance_update_blocked_averages(int cpu)
> +static void __sched_balance_update_blocked_averages(struct rq *rq)
> {
> bool decayed = false, done = true;
> - struct rq *rq = cpu_rq(cpu);
> - struct rq_flags rf;
>
> - rq_lock_irqsave(rq, &rf);
> update_blocked_load_tick(rq);
> - update_rq_clock(rq);
>
> decayed |= __update_blocked_others(rq, &done);
> decayed |= __update_blocked_fair(rq, &done);
> @@ -9918,7 +9914,15 @@ static void sched_balance_update_blocked
> update_blocked_load_status(rq, !done);
> if (decayed)
> cpufreq_update_util(rq, 0);
> - rq_unlock_irqrestore(rq, &rf);
> +}
> +
> +static void sched_balance_update_blocked_averages(int cpu)
> +{
> + struct rq *rq = cpu_rq(cpu);
> +
> + guard(rq_lock_irqsave)(rq);
> + update_rq_clock(rq);
> + __sched_balance_update_blocked_averages(rq);
> }
>
> /********** Helpers for sched_balance_find_src_group ************************/
> @@ -12865,12 +12869,17 @@ static int sched_balance_newidle(struct
> }
> rcu_read_unlock();
>
> + /*
> + * Include sched_balance_update_blocked_averages() in the cost
> + * calculation because it can be quite costly -- this ensures we skip
> + * it when avg_idle gets to be very low.
> + */
> + t0 = sched_clock_cpu(this_cpu);
> + __sched_balance_update_blocked_averages(this_rq);
> +
I think we do update_rq_clock earlier as early as __schedule.
no warnings seen.
Reviewed-by: Shrikanth Hegde <sshegde@linux.ibm.com>
© 2016 - 2025 Red Hat, Inc.