kernel/sched/fair.c | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-)
When NI_RANDOM is enabled, some sched domains may be skipped. If the
sched_balance_newidle() fails to pull any tasks, there maybe substantial
amount of idle time.
This patch introduces a "half-idle" retry mechanism to harness the
remaining idle time, safely re-evaluating the domains that were skipped
stochastically in the first round. The retry logic is restricted by
following constraints:
1. Half Idle Time Threshold: Retries are triggered only when the
remaining idle time exceeds avg_idle / 2. This ensure that there is enough
idle time to aborb the retry costs.
2. Dynamic Re-try Window: Instead of setting the half_idle as avg_idle / 2,
initializing it with the remain_idle. When remain_idle is abundant, using
avg_idle / 2 would shrink the window and skip most of domains.
3. LLC Only: This ensures that the newly triggered load balance remain
lightweight.
4. Skipped Domains Only: The retry only re-evaluates domains that were
skipped by NI_RANDOM during the first pass, this is tracked by 'try_bits'.
Additionally, because this is a 'retry' path, the weight is maintained at
1 to ensure long-term balancing accuracy.
Performance Evaluation:
Tested with schbench against v7.1-rc3 baseline, each case was tested 10
times and the average was used.
[schbench]
-------------------------------------------------------------------------
| baseline | | compare |
loads |rps(M/s) 99th Wakeup Lat| |rps(M/s) 99th Wakeup Lat|
-------------------------------------------------------------------------
threads=2 |1.03 7.14 | |1.05(+1.94%) 7.08 (-0.84%) |
threads=4 |2.68 7.97 | |2.82(+5.22%) 7.82 (-1.88%) |
threads=8 |3.04 12.91 | |3.23(+6.25%) 12.38 (-4.11%) |
threads=16 |3.06 23.27 | |3.30(+7.84%) 21.90 (-5.89%) |
threads=32 |2.95 46.50 | |3.25(+10.17%) 43.28 (-6.92%) |
threads=64 |2.81 94.92 | |3.14(+11.74%) 88.34 (-6.93%) |
threads=128 |2.71 195.77 | |2.97(+9.59%) 183.07 (-6.49%)|
threads=256 |2.60 403.34 | |2.77(+6.54%) 385.30 (-4.47%)|
-------------------------------------------------------------------------
Signed-off-by: Luo Gengkun <luogengkun2@huawei.com>
---
kernel/sched/fair.c | 31 +++++++++++++++++++++++++++----
1 file changed, 27 insertions(+), 4 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 3ebec186f982..5d1c73a41c37 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -13185,6 +13185,7 @@ static int sched_balance_newidle(struct rq *this_rq, struct rq_flags *rf)
int this_cpu = this_rq->cpu;
int continue_balancing = 1;
u64 t0, t1, curr_cost = 0;
+ u64 idx, half_idle = 0, try_bits = 0;
struct sched_domain *sd;
int pulled_task = 0;
@@ -13240,18 +13241,28 @@ static int sched_balance_newidle(struct rq *this_rq, struct rq_flags *rf)
rq_modified_begin(this_rq, &fair_sched_class);
raw_spin_rq_unlock(this_rq);
+retry:
+ idx = 0;
for_each_domain(this_cpu, sd) {
- u64 domain_cost;
+ u64 domain_cost, next_cost = curr_cost + sd->max_newidle_lb_cost;
- update_next_balance(sd, &next_balance);
+ if (!half_idle)
+ update_next_balance(sd, &next_balance);
- if (this_rq->avg_idle < curr_cost + sd->max_newidle_lb_cost)
+ if (this_rq->avg_idle < next_cost) {
+ continue_balancing = 0;
break;
+ }
+
+ if (try_bits & (1UL << ++idx) ||
+ (half_idle && (!(sd->flags & SD_SHARE_LLC) || next_cost >= half_idle)))
+ continue;
if (sd->flags & SD_BALANCE_NEWIDLE) {
unsigned int weight = 1;
- if (sched_feat(NI_RANDOM) && sd->newidle_ratio < 1024) {
+ if (sched_feat(NI_RANDOM) && sd->newidle_ratio < 1024 &&
+ !half_idle) {
/*
* Throw a 1k sided dice; and only run
* newidle_balance according to the success
@@ -13266,6 +13277,7 @@ static int sched_balance_newidle(struct rq *this_rq, struct rq_flags *rf)
weight = (1024 + weight/2) / weight;
}
+ try_bits |= (1UL << idx);
pulled_task = sched_balance_rq(this_cpu, this_rq,
sd, CPU_NEWLY_IDLE,
&continue_balancing);
@@ -13290,6 +13302,17 @@ static int sched_balance_newidle(struct rq *this_rq, struct rq_flags *rf)
break;
}
+ if (sched_feat(NI_RANDOM) && !half_idle &&
+ !(pulled_task || !continue_balancing)) {
+ s64 remain_idle = this_rq->avg_idle - curr_cost;
+
+ if (remain_idle > 0 &&
+ remain_idle >= this_rq->avg_idle / 2) {
+ half_idle = remain_idle;
+ goto retry;
+ }
+ }
+
raw_spin_rq_lock(this_rq);
if (curr_cost > this_rq->max_idle_balance_cost)
--
2.34.1
© 2016 - 2026 Red Hat, Inc.