[PATCH v8] sched: update rq->avg_idle when a task is moved to an idle CPU

Shubhang Kaushik posted 1 patch 2 weeks, 3 days ago
kernel/sched/core.c  | 24 ++++++++++++------------
kernel/sched/idle.c  |  1 +
kernel/sched/sched.h |  1 +
3 files changed, 14 insertions(+), 12 deletions(-)
[PATCH v8] sched: update rq->avg_idle when a task is moved to an idle CPU
Posted by Shubhang Kaushik 2 weeks, 3 days ago
Currently, rq->idle_stamp is only used to calculate avg_idle during
wakeups. This means other paths that move a task to an idle CPU such as
fork/clone, execve, or migrations, do not end the CPU's idle status in
the scheduler's eyes, leading to an inaccurate avg_idle.

This patch introduces update_rq_avg_idle() to provide a more accurate
measurement of CPU idle duration. By invoking this helper in
put_prev_task_idle(), we ensure avg_idle is updated whenever a CPU
stops being idle, regardless of how the new task arrived.

Changes in v8:
- Removed the 'if (rq->idle_stamp)' check: Based on reviewer feedback,
  tracking any idle duration (not just fair-class specific) provides a
  more universal view of core availability.

Testing on an 80-core Ampere Altra (ARMv8) with 6.19-rc5 baseline:
- Hackbench : +7.2% performance gain at 16 threads.
- Schbench: Reduced p99.9 tail latencies at high concurrency.

Tested-by: Shubhang Kaushik <shubhang@os.amperecomputing.com>
Signed-off-by: Shubhang Kaushik <shubhang@os.amperecomputing.com>
---
This series improves the accuracy of rq->avg_idle by ensuring the CPU's idle
duration is updated whenever a task moves to an idle CPU.

The rq->idle_stamp is only cleared during wakeups. This leaves other paths
that move a task to an idle CPU, such as fork, exec, or load balancing
migrations, unable to end the CPU's idle status in the scheduler's view.
This architectural gap produces stale avg_idle values, misleading the
new idle balancer into incorrectly skipping task migrations and degrading
overall throughput on high core count systems.

v7--> v8:
    Remove the 'if (rq->idle_stamp)' condition check in
    update_rq_avg_idle().
    --v7:https://lkml.org/lkml/2025/12/26/90

v6--> v7:
    Call the update_rq_avg_idle() in the put_prev_task_idle().
    Remove the patch 1 in the original patch set.
   --v6:https://lkml.org/lkml/2025/12/9/377

v5--> v6:
    Remove "this_rq->idle_stamp = 0;" in patch 1.
    Update the test result with Specjbb.
   --v5:https://lkml.org/lkml/2025/12/3/179

v4--> v5:
    Modify the changelog.

   --v4:https://lkml.org/lkml/2025/11/28/300

v3--> v4:
     Remove the code for delayed task.

   --v3: https://lkml.org/lkml/2025/11/27/456

v2--> v3:
  -- merge patch 3 into patch 2:
      move update_rq_avg_idle() to enqueue_task().

   --v2: https://lkml.org/lkml/2025/11/27/214

v1--> v2:
  -- Put update_rq_avg_idle() to activate_task()
  -- Add Delay-dequeue task check.

   --v1: https://lkml.org/lkml/2025/11/24/97

kernel/sched/core.c | 23 +++++++++++------------
kernel/sched/idle.c | 1 +
kernel/sched/sched.h | 1 +
3 files changed, 13 insertions(+), 12 deletions(-)
--
2.52.0

sched/core: update rq->avg_idle when a task is moved to an idle CPU
---
 kernel/sched/core.c  | 24 ++++++++++++------------
 kernel/sched/idle.c  |  1 +
 kernel/sched/sched.h |  1 +
 3 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 045f83ad261e25283d290fd064ad47cd2399dc79..81a841e22c961ff04ad291eeeed81147fd464324 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3607,6 +3607,18 @@ static inline void ttwu_do_wakeup(struct task_struct *p)
 	trace_sched_wakeup(p);
 }
 
+void update_rq_avg_idle(struct rq *rq)
+{
+	u64 delta = rq_clock(rq) - rq->idle_stamp;
+	u64 max = 2*rq->max_idle_balance_cost;
+
+	update_avg(&rq->avg_idle, delta);
+
+	if (rq->avg_idle > max)
+		rq->avg_idle = max;
+	rq->idle_stamp = 0;
+}
+
 static void
 ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
 		 struct rq_flags *rf)
@@ -3642,18 +3654,6 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
 		p->sched_class->task_woken(rq, p);
 		rq_repin_lock(rq, rf);
 	}
-
-	if (rq->idle_stamp) {
-		u64 delta = rq_clock(rq) - rq->idle_stamp;
-		u64 max = 2*rq->max_idle_balance_cost;
-
-		update_avg(&rq->avg_idle, delta);
-
-		if (rq->avg_idle > max)
-			rq->avg_idle = max;
-
-		rq->idle_stamp = 0;
-	}
 }
 
 /*
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index c174afe1dd177a22535417be0de1fc1b690c0368..36ddc5bcfa0383bd4d07d3c8b732ee5b8567d194 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -460,6 +460,7 @@ static void put_prev_task_idle(struct rq *rq, struct task_struct *prev, struct t
 {
 	update_curr_idle(rq);
 	scx_update_idle(rq, false, true);
+	update_rq_avg_idle(rq);
 }
 
 static void set_next_task_idle(struct rq *rq, struct task_struct *next, bool first)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 93fce4bbff5eac1d4719394e89dfae886b74d865..7edf8600f2c3f45afa32bc73db2155ea6e0067f0 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1676,6 +1676,7 @@ static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
 
 #endif /* !CONFIG_FAIR_GROUP_SCHED */
 
+extern void update_rq_avg_idle(struct rq *rq);
 extern void update_rq_clock(struct rq *rq);
 
 /*

---
base-commit: 24d479d26b25bce5faea3ddd9fa8f3a6c3129ea7
change-id: 20260116-v8-patch-series-5ff91b821cd4

Best regards,
-- 
Shubhang Kaushik <shubhang@os.amperecomputing.com>
Re: [PATCH v8] sched: update rq->avg_idle when a task is moved to an idle CPU
Posted by Huang Shijie 2 weeks, 1 day ago
On Wed, Jan 21, 2026 at 01:31:53AM -0800, Shubhang Kaushik wrote:
> Currently, rq->idle_stamp is only used to calculate avg_idle during
> wakeups. This means other paths that move a task to an idle CPU such as
> fork/clone, execve, or migrations, do not end the CPU's idle status in
> the scheduler's eyes, leading to an inaccurate avg_idle.
> 
> This patch introduces update_rq_avg_idle() to provide a more accurate
> measurement of CPU idle duration. By invoking this helper in
> put_prev_task_idle(), we ensure avg_idle is updated whenever a CPU
> stops being idle, regardless of how the new task arrived.
> 
> Changes in v8:
> - Removed the 'if (rq->idle_stamp)' check: Based on reviewer feedback,
>   tracking any idle duration (not just fair-class specific) provides a
>   more universal view of core availability.
> 
> Testing on an 80-core Ampere Altra (ARMv8) with 6.19-rc5 baseline:
> - Hackbench : +7.2% performance gain at 16 threads.
> - Schbench: Reduced p99.9 tail latencies at high concurrency.
I am very glad to see this result. :)


Thanks
Huang Shijie
Re: [PATCH v8] sched: update rq->avg_idle when a task is moved to an idle CPU
Posted by K Prateek Nayak 2 weeks, 2 days ago
Hello Shubhang,

On 1/21/2026 3:01 PM, Shubhang Kaushik wrote:
> Currently, rq->idle_stamp is only used to calculate avg_idle during
> wakeups. This means other paths that move a task to an idle CPU such as
> fork/clone, execve, or migrations, do not end the CPU's idle status in
> the scheduler's eyes, leading to an inaccurate avg_idle.
> 
> This patch introduces update_rq_avg_idle() to provide a more accurate
> measurement of CPU idle duration. By invoking this helper in
> put_prev_task_idle(), we ensure avg_idle is updated whenever a CPU
> stops being idle, regardless of how the new task arrived.
> 
> Changes in v8:
> - Removed the 'if (rq->idle_stamp)' check: Based on reviewer feedback,
>   tracking any idle duration (not just fair-class specific) provides a
>   more universal view of core availability.
> 
> Testing on an 80-core Ampere Altra (ARMv8) with 6.19-rc5 baseline:
> - Hackbench : +7.2% performance gain at 16 threads.
> - Schbench: Reduced p99.9 tail latencies at high concurrency.
> 
> Tested-by: Shubhang Kaushik <shubhang@os.amperecomputing.com>
> Signed-off-by: Shubhang Kaushik <shubhang@os.amperecomputing.com>

For most part I haven't observed any regressions. The one that shows up
more consistently is with tbench-256 clients which seems to be super
sensitive on any newidle balance changes on my setup:

  =====================================
  Test          : tbench
  Units         : Normalized throughput
  Interpretation: Higher is better
  Statistic     : %diff in AMean
  =====================================

    Clients     %diff
          1       -3%
          2       -1%
          4        0%
          8        0%
         16        0%
         32       -1%
         64       -2%
        128       -1%
        256       -5% *
        512        0%
       1024        0%

Note: During reruns with profiling, I've seen the results on tip being
more closer to the patched kernel (~2% - within the margin of error).

Looking at schedstats show a lot more idle and newidle balance attempts
within the MC domain (MC, SMT) in the bad case:

  ----------------------------------------------------------------------------------------------------
  CPU: <ALL CPUS SUMMARY> | DOMAIN: SMT
  ----------------------------------------------------------------------------------------------------
  DESC                                                                    COUNT1      COUNT2   PCT_CHANGE     AVG_JIFFIES1 AVG_JIFFIES2
  ----------------------------------------- <Category idle> ------------------------------------------
  idle_lb_count                                                    :         721,       1080  |    49.79% |  $       28.30,       18.89 $
  idle_lb_balanced                                                 :         638,        835  |    30.88% |  $       31.98,       24.43 $
  idle_lb_failed                                                   :          60,        173  |   188.33% |  $      340.03,      117.90 $
  ...
  *idle_lb_success_count                                           :          23,         72  |   213.04% |
  *idle_lb_avg_pulled                                              :        1.13,       1.08  |    -4.17% |
  ---------------------------------------- <Category newidle> ----------------------------------------
  newidle_lb_count                                                 :        3964,      17961  |   353.10% |  $        5.15,        1.14 $
  newidle_lb_balanced                                              :        3235,      13723  |   324.20% |  $        6.31,        1.49 $
  newidle_lb_failed                                                :         540,       3227  |   497.59% |  $       37.78,        6.32 $
  ...
  *newidle_lb_success_count                                        :         189,       1011  |   434.92% |
  *newidle_lb_avg_pulled                                           :        0.99,       1.00  |     0.43% |
  --------------------------------- <Category active_load_balance()> ---------------------------------
  
  ----------------------------------------------------------------------------------------------------
  CPU: <ALL CPUS SUMMARY> | DOMAIN: MC
  ----------------------------------------------------------------------------------------------------
  DESC                                                                    COUNT1      COUNT2   PCT_CHANGE     AVG_JIFFIES1 AVG_JIFFIES2
  ----------------------------------------- <Category idle> ------------------------------------------
  idle_lb_count                                                    :         301,        527  |    75.08% |  $       67.78,       38.70 $
  idle_lb_balanced                                                 :          97,        128  |    31.96% |  $      210.33,      159.34 $
  idle_lb_failed                                                   :         179,        354  |    97.77% |  $      113.98,       57.62 $
  ...
  *idle_lb_success_count                                           :          25,         45  |    80.00% |
  *idle_lb_avg_pulled                                              :        1.52,       1.40  |    -7.89% |
  ---------------------------------------- <Category newidle> ----------------------------------------
  newidle_lb_count                                                 :        1917,       7022  |   266.30% |  $       10.64,        2.90 $
  newidle_lb_balanced                                              :         380,        793  |   108.68% |  $       53.69,       25.72 $
  newidle_lb_failed                                                :        1481,       6011  |   305.87% |  $       13.78,        3.39 $
  ...
  *newidle_lb_success_count                                        :          56,        218  |   289.29% |
  *newidle_lb_avg_pulled                                           :        0.98,       1.00  |     1.35% |
  --------------------------------- <Category active_load_balance()> ---------------------------------

  (Full schedstats diff attached below)

For PKG and above, the difference isn't too much. The success count also
increases proportional to the attempts but seems like the fact that we
are doing those additional attempts isn't sitting too well with this
particular benchmark.

tbench has these super short sleep durations and benefits from running
client and server on the same LLC domain. schbench latencies for similar
configs don't show any difference so I wouldn't worry too much about this
specific regression.

Feel free to include:

Tested-by: K Prateek Nayak <kprateek.nayak@amd.com>

-- 
Thanks and Regards,
Prateek
Description
----------------------------------------------------------------------------------------------------
DESC                          -> Description of the field
COUNT                         -> Value of the field
PCT_CHANGE                    -> Percent change with corresponding base value
AVG_JIFFIES                   -> Avg time in jiffies between two consecutive occurrence of event
----------------------------------------------------------------------------------------------------

Time elapsed (in jiffies)                                        :       20402,      20396
----------------------------------------------------------------------------------------------------
CPU: <ALL CPUS SUMMARY>
----------------------------------------------------------------------------------------------------
DESC                                                                       TIP    AVG_IDLE   PCT_CHANGE    PCT_CHANGE1 PCT_CHANGE2
----------------------------------------------------------------------------------------------------
yld_count                                                        :           0,          0  |     0.00% |
array_exp                                                        :           0,          0  |     0.00% |
sched_count                                                      :     4906662,    4927584  |     0.43% |
sched_goidle                                                     :      252107,     426743  |    69.27% |  (     5.14%,      8.66% )
ttwu_count                                                       :     4643524,    4488589  |    -3.34% |
ttwu_local                                                       :     4383253,    4037104  |    -7.90% |  (    94.39%,     89.94% )
rq_cpu_time                                                      : 69308138825,67737374675  |    -2.27% |
run_delay                                                        : 17304307884,18514782424  |     7.00% |  (    24.97%,     27.33% )
pcount                                                           :     4654499,    4500796  |    -3.30% |
----------------------------------------------------------------------------------------------------
CPU: <ALL CPUS SUMMARY> | DOMAIN: SMT
----------------------------------------------------------------------------------------------------
DESC                                                                       TIP    AVG_IDLE   PCT_CHANGE     AVG_JIFFIES1 AVG_JIFFIES2
----------------------------------------- <Category busy> ------------------------------------------
busy_lb_count                                                    :        1161,       1135  |    -2.24% |  $       17.57,       17.97 $
busy_lb_balanced                                                 :        1057,        999  |    -5.49% |  $       19.30,       20.42 $
busy_lb_failed                                                   :          16,         20  |    25.00% |  $     1275.12,     1019.80 $
busy_lb_imbalance_load                                           :         148,        197  |    33.11% |
busy_lb_imbalance_util                                           :           0,          0  |     0.00% |
busy_lb_imbalance_task                                           :           0,          0  |     0.00% |
busy_lb_imbalance_misfit                                         :           0,          0  |     0.00% |
busy_lb_gained                                                   :          94,        124  |    31.91% |
busy_lb_hot_gained                                               :           0,          0  |     0.00% |
busy_lb_nobusyq                                                  :         157,        118  |   -24.84% |  $      129.95,      172.85 $
busy_lb_nobusyg                                                  :         900,        881  |    -2.11% |  $       22.67,       23.15 $
*busy_lb_success_count                                           :          88,        116  |    31.82% |
*busy_lb_avg_pulled                                              :        1.07,       1.07  |     0.07% |
----------------------------------------- <Category idle> ------------------------------------------
idle_lb_count                                                    :         721,       1080  |    49.79% |  $       28.30,       18.89 $
idle_lb_balanced                                                 :         638,        835  |    30.88% |  $       31.98,       24.43 $
idle_lb_failed                                                   :          60,        173  |   188.33% |  $      340.03,      117.90 $
idle_lb_imbalance_load                                           :           0,          0  |     0.00% |
idle_lb_imbalance_util                                           :           0,          0  |     0.00% |
idle_lb_imbalance_task                                           :          83,        246  |   196.39% |
idle_lb_imbalance_misfit                                         :           0,          0  |     0.00% |
idle_lb_gained                                                   :          26,         78  |   200.00% |
idle_lb_hot_gained                                               :           0,          0  |     0.00% |
idle_lb_nobusyq                                                  :           0,          0  |     0.00% |  $        0.00,        0.00 $
idle_lb_nobusyg                                                  :         638,        834  |    30.72% |  $       31.98,       24.46 $
*idle_lb_success_count                                           :          23,         72  |   213.04% |
*idle_lb_avg_pulled                                              :        1.13,       1.08  |    -4.17% |
---------------------------------------- <Category newidle> ----------------------------------------
newidle_lb_count                                                 :        3964,      17961  |   353.10% |  $        5.15,        1.14 $
newidle_lb_balanced                                              :        3235,      13723  |   324.20% |  $        6.31,        1.49 $
newidle_lb_failed                                                :         540,       3227  |   497.59% |  $       37.78,        6.32 $
newidle_lb_imbalance_load                                        :           0,          0  |     0.00% |
newidle_lb_imbalance_util                                        :           0,          0  |     0.00% |
newidle_lb_imbalance_task                                        :         734,       4245  |   478.34% |
newidle_lb_imbalance_misfit                                      :           0,          0  |     0.00% |
newidle_lb_gained                                                :         188,       1010  |   437.23% |
newidle_lb_hot_gained                                            :           0,          0  |     0.00% |
newidle_lb_nobusyq                                               :           0,          1  |     0.00% |  $        0.00,    20396.00 $
newidle_lb_nobusyg                                               :        3233,      13714  |   324.19% |  $        6.31,        1.49 $
*newidle_lb_success_count                                        :         189,       1011  |   434.92% |
*newidle_lb_avg_pulled                                           :        0.99,       1.00  |     0.43% |
--------------------------------- <Category active_load_balance()> ---------------------------------
alb_count                                                        :           4,          7  |    75.00% |
alb_failed                                                       :           0,          0  |     0.00% |
alb_pushed                                                       :           4,          7  |    75.00% |
--------------------------------- <Category sched_balance_exec()> ----------------------------------
sbe_count                                                        :           0,          0  |     0.00% |
sbe_balanced                                                     :           0,          0  |     0.00% |
sbe_pushed                                                       :           0,          0  |     0.00% |
--------------------------------- <Category sched_balance_fork()> ----------------------------------
sbf_count                                                        :           0,          0  |     0.00% |
sbf_balanced                                                     :           0,          0  |     0.00% |
sbf_pushed                                                       :           0,          0  |     0.00% |
------------------------------------------ <Wakeup Info> -------------------------------------------
ttwu_wake_remote                                                 :      177832,     252120  |    41.77% |
ttwu_move_affine                                                 :      174138,     247729  |    42.26% |
ttwu_move_balance                                                :           0,          0  |     0.00% |
----------------------------------------------------------------------------------------------------
CPU: <ALL CPUS SUMMARY> | DOMAIN: MC
----------------------------------------------------------------------------------------------------
DESC                                                                       TIP    AVG_IDLE   PCT_CHANGE     AVG_JIFFIES1 AVG_JIFFIES2
----------------------------------------- <Category busy> ------------------------------------------
busy_lb_count                                                    :         133,        117  |   -12.03% |  $      153.40,      174.32 $
busy_lb_balanced                                                 :         123,        104  |   -15.45% |  $      165.87,      196.12 $
busy_lb_failed                                                   :           9,         11  |    22.22% |  $     2266.89,     1854.18 $
busy_lb_imbalance_load                                           :          27,         34  |    25.93% |
busy_lb_imbalance_util                                           :           0,          0  |     0.00% |
busy_lb_imbalance_task                                           :           0,          0  |     0.00% |
busy_lb_imbalance_misfit                                         :           0,          0  |     0.00% |
busy_lb_gained                                                   :           1,          2  |   100.00% |
busy_lb_hot_gained                                               :           1,          2  |   100.00% |
busy_lb_nobusyq                                                  :           6,          3  |   -50.00% |  $     3400.33,     6798.67 $
busy_lb_nobusyg                                                  :          52,         44  |   -15.38% |  $      392.35,      463.55 $
*busy_lb_success_count                                           :           1,          2  |   100.00% |
*busy_lb_avg_pulled                                              :        1.00,       1.00  |     0.00% |
----------------------------------------- <Category idle> ------------------------------------------
idle_lb_count                                                    :         301,        527  |    75.08% |  $       67.78,       38.70 $
idle_lb_balanced                                                 :          97,        128  |    31.96% |  $      210.33,      159.34 $
idle_lb_failed                                                   :         179,        354  |    97.77% |  $      113.98,       57.62 $
idle_lb_imbalance_load                                           :           0,          0  |     0.00% |
idle_lb_imbalance_util                                           :           0,          0  |     0.00% |
idle_lb_imbalance_task                                           :         231,        474  |   105.19% |
idle_lb_imbalance_misfit                                         :           0,          0  |     0.00% |
idle_lb_gained                                                   :          38,         63  |    65.79% |
idle_lb_hot_gained                                               :          26,         42  |    61.54% |
idle_lb_nobusyq                                                  :           0,          0  |     0.00% |  $        0.00,        0.00 $
idle_lb_nobusyg                                                  :          51,         61  |    19.61% |  $      400.04,      334.36 $
*idle_lb_success_count                                           :          25,         45  |    80.00% |
*idle_lb_avg_pulled                                              :        1.52,       1.40  |    -7.89% |
---------------------------------------- <Category newidle> ----------------------------------------
newidle_lb_count                                                 :        1917,       7022  |   266.30% |  $       10.64,        2.90 $
newidle_lb_balanced                                              :         380,        793  |   108.68% |  $       53.69,       25.72 $
newidle_lb_failed                                                :        1481,       6011  |   305.87% |  $       13.78,        3.39 $
newidle_lb_imbalance_load                                        :           6,         22  |   266.67% |
newidle_lb_imbalance_util                                        :           0,          0  |     0.00% |
newidle_lb_imbalance_task                                        :        1636,       6823  |   317.05% |
newidle_lb_imbalance_misfit                                      :           0,          0  |     0.00% |
newidle_lb_gained                                                :          55,        217  |   294.55% |
newidle_lb_hot_gained                                            :           8,         33  |   312.50% |
newidle_lb_nobusyq                                               :           5,         14  |   180.00% |  $     4080.40,     1456.86 $
newidle_lb_nobusyg                                               :         373,        777  |   108.31% |  $       54.70,       26.25 $
*newidle_lb_success_count                                        :          56,        218  |   289.29% |
*newidle_lb_avg_pulled                                           :        0.98,       1.00  |     1.35% |
--------------------------------- <Category active_load_balance()> ---------------------------------
alb_count                                                        :          10,         13  |    30.00% |
alb_failed                                                       :           0,          0  |     0.00% |
alb_pushed                                                       :          10,         13  |    30.00% |
--------------------------------- <Category sched_balance_exec()> ----------------------------------
sbe_count                                                        :           0,          0  |     0.00% |
sbe_balanced                                                     :           0,          0  |     0.00% |
sbe_pushed                                                       :           0,          0  |     0.00% |
--------------------------------- <Category sched_balance_fork()> ----------------------------------
sbf_count                                                        :           0,          0  |     0.00% |
sbf_balanced                                                     :           0,          0  |     0.00% |
sbf_pushed                                                       :           0,          0  |     0.00% |
------------------------------------------ <Wakeup Info> -------------------------------------------
ttwu_wake_remote                                                 :       82399,     199321  |   141.90% |
ttwu_move_affine                                                 :       75851,     172178  |   127.00% |
ttwu_move_balance                                                :           0,          0  |     0.00% |
----------------------------------------------------------------------------------------------------
CPU: <ALL CPUS SUMMARY> | DOMAIN: PKG
----------------------------------------------------------------------------------------------------
DESC                                                                       TIP    AVG_IDLE   PCT_CHANGE     AVG_JIFFIES1 AVG_JIFFIES2
----------------------------------------- <Category busy> ------------------------------------------
busy_lb_count                                                    :          10,          8  |   -20.00% |  $     2040.20,     2549.50 $
busy_lb_balanced                                                 :          10,          8  |   -20.00% |  $     2040.20,     2549.50 $
busy_lb_failed                                                   :           0,          0  |     0.00% |  $        0.00,        0.00 $
busy_lb_imbalance_load                                           :           1,          0  |  -100.00% |
busy_lb_imbalance_util                                           :           0,          0  |     0.00% |
busy_lb_imbalance_task                                           :           0,          0  |     0.00% |
busy_lb_imbalance_misfit                                         :           0,          0  |     0.00% |
busy_lb_gained                                                   :           0,          0  |     0.00% |
busy_lb_hot_gained                                               :           0,          0  |     0.00% |
busy_lb_nobusyq                                                  :           0,          0  |     0.00% |  $        0.00,        0.00 $
busy_lb_nobusyg                                                  :           0,          0  |     0.00% |  $        0.00,        0.00 $
*busy_lb_success_count                                           :           0,          0  |     0.00% |
*busy_lb_avg_pulled                                              :        0.00,       0.00  |     0.00% |
----------------------------------------- <Category idle> ------------------------------------------
idle_lb_count                                                    :          55,         93  |    69.09% |  $      370.95,      219.31 $
idle_lb_balanced                                                 :          49,         77  |    57.14% |  $      416.37,      264.88 $
idle_lb_failed                                                   :           6,         14  |   133.33% |  $     3400.33,     1456.86 $
idle_lb_imbalance_load                                           :          52,         56  |     7.69% |
idle_lb_imbalance_util                                           :       13439,      40747  |   203.20% |
idle_lb_imbalance_task                                           :           0,          0  |     0.00% |
idle_lb_imbalance_misfit                                         :           0,          0  |     0.00% |
idle_lb_gained                                                   :           0,          0  |     0.00% |
idle_lb_hot_gained                                               :           0,          0  |     0.00% |
idle_lb_nobusyq                                                  :           0,          1  |     0.00% |  $        0.00,    20396.00 $
idle_lb_nobusyg                                                  :          19,         20  |     5.26% |  $     1073.79,     1019.80 $
*idle_lb_success_count                                           :           0,          2  |     0.00% |
*idle_lb_avg_pulled                                              :        0.00,       0.00  |     0.00% |
---------------------------------------- <Category newidle> ----------------------------------------
newidle_lb_count                                                 :          43,         13  |   -69.77% |  $      474.47,     1568.92 $
newidle_lb_balanced                                              :          28,          8  |   -71.43% |  $      728.64,     2549.50 $
newidle_lb_failed                                                :          14,          5  |   -64.29% |  $     1457.29,     4079.20 $
newidle_lb_imbalance_load                                        :          65,         24  |   -63.08% |
newidle_lb_imbalance_util                                        :       68852,       7111  |   -89.67% |
newidle_lb_imbalance_task                                        :           9,          5  |   -44.44% |
newidle_lb_imbalance_misfit                                      :           0,          0  |     0.00% |
newidle_lb_gained                                                :           0,          0  |     0.00% |
newidle_lb_hot_gained                                            :           0,          0  |     0.00% |
newidle_lb_nobusyq                                               :           0,          0  |     0.00% |  $        0.00,        0.00 $
newidle_lb_nobusyg                                               :          27,          7  |   -74.07% |  $      755.63,     2913.71 $
*newidle_lb_success_count                                        :           1,          0  |  -100.00% |
*newidle_lb_avg_pulled                                           :        0.00,       0.00  |     0.00% |
--------------------------------- <Category active_load_balance()> ---------------------------------
alb_count                                                        :           0,          0  |     0.00% |
alb_failed                                                       :           0,          0  |     0.00% |
alb_pushed                                                       :           0,          0  |     0.00% |
--------------------------------- <Category sched_balance_exec()> ----------------------------------
sbe_count                                                        :           0,          0  |     0.00% |
sbe_balanced                                                     :           0,          0  |     0.00% |
sbe_pushed                                                       :           0,          0  |     0.00% |
--------------------------------- <Category sched_balance_fork()> ----------------------------------
sbf_count                                                        :           0,          0  |     0.00% |
sbf_balanced                                                     :           0,          0  |     0.00% |
sbf_pushed                                                       :           0,          0  |     0.00% |
------------------------------------------ <Wakeup Info> -------------------------------------------
ttwu_wake_remote                                                 :           8,         12  |    50.00% |
ttwu_move_affine                                                 :           1,          1  |     0.00% |
ttwu_move_balance                                                :           0,          0  |     0.00% |
----------------------------------------------------------------------------------------------------
CPU: <ALL CPUS SUMMARY> | DOMAIN: NUMA
----------------------------------------------------------------------------------------------------
DESC                                                                       TIP    AVG_IDLE   PCT_CHANGE     AVG_JIFFIES1 AVG_JIFFIES2
----------------------------------------- <Category busy> ------------------------------------------
busy_lb_count                                                    :           4,          4  |     0.00% |  $     5100.50,     5099.00 $
busy_lb_balanced                                                 :           4,          4  |     0.00% |  $     5100.50,     5099.00 $
busy_lb_failed                                                   :           0,          0  |     0.00% |  $        0.00,        0.00 $
busy_lb_imbalance_load                                           :           0,          0  |     0.00% |
busy_lb_imbalance_util                                           :           0,          0  |     0.00% |
busy_lb_imbalance_task                                           :           0,          0  |     0.00% |
busy_lb_imbalance_misfit                                         :           0,          0  |     0.00% |
busy_lb_gained                                                   :           0,          0  |     0.00% |
busy_lb_hot_gained                                               :           0,          0  |     0.00% |
busy_lb_nobusyq                                                  :           0,          0  |     0.00% |  $        0.00,        0.00 $
busy_lb_nobusyg                                                  :           0,          0  |     0.00% |  $        0.00,        0.00 $
*busy_lb_success_count                                           :           0,          0  |     0.00% |
*busy_lb_avg_pulled                                              :        0.00,       0.00  |     0.00% |
----------------------------------------- <Category idle> ------------------------------------------
idle_lb_count                                                    :          35,         52  |    48.57% |  $      582.91,      392.23 $
idle_lb_balanced                                                 :          34,         51  |    50.00% |  $      600.06,      399.92 $
idle_lb_failed                                                   :           0,          1  |     0.00% |  $        0.00,    20396.00 $
idle_lb_imbalance_load                                           :          58,         64  |    10.34% |
idle_lb_imbalance_util                                           :           0,      12169  |     0.00% |
idle_lb_imbalance_task                                           :           0,          0  |     0.00% |
idle_lb_imbalance_misfit                                         :           0,          0  |     0.00% |
idle_lb_gained                                                   :           0,          0  |     0.00% |
idle_lb_hot_gained                                               :           0,          0  |     0.00% |
idle_lb_nobusyq                                                  :           0,          0  |     0.00% |  $        0.00,        0.00 $
idle_lb_nobusyg                                                  :           1,          0  |  -100.00% |  $    20402.00,        0.00 $
*idle_lb_success_count                                           :           1,          0  |  -100.00% |
*idle_lb_avg_pulled                                              :        0.00,       0.00  |     0.00% |
---------------------------------------- <Category newidle> ----------------------------------------
newidle_lb_count                                                 :          21,          4  |   -80.95% |  $      971.52,     5099.00 $
newidle_lb_balanced                                              :          18,          2  |   -88.89% |  $     1133.44,    10198.00 $
newidle_lb_failed                                                :           2,          1  |   -50.00% |  $    10201.00,    20396.00 $
newidle_lb_imbalance_load                                        :         418,         89  |   -78.71% |
newidle_lb_imbalance_util                                        :        2014,      24728  |  1127.81% |
newidle_lb_imbalance_task                                        :          20,         12  |   -40.00% |
newidle_lb_imbalance_misfit                                      :           0,          0  |     0.00% |
newidle_lb_gained                                                :           0,          0  |     0.00% |
newidle_lb_hot_gained                                            :           0,          0  |     0.00% |
newidle_lb_nobusyq                                               :           0,          0  |     0.00% |  $        0.00,        0.00 $
newidle_lb_nobusyg                                               :          10,          1  |   -90.00% |  $     2040.20,    20396.00 $
*newidle_lb_success_count                                        :           1,          1  |     0.00% |
*newidle_lb_avg_pulled                                           :        0.00,       0.00  |     0.00% |
--------------------------------- <Category active_load_balance()> ---------------------------------
alb_count                                                        :           0,          0  |     0.00% |
alb_failed                                                       :           0,          0  |     0.00% |
alb_pushed                                                       :           0,          0  |     0.00% |
--------------------------------- <Category sched_balance_exec()> ----------------------------------
sbe_count                                                        :           0,          0  |     0.00% |
sbe_balanced                                                     :           0,          0  |     0.00% |
sbe_pushed                                                       :           0,          0  |     0.00% |
--------------------------------- <Category sched_balance_fork()> ----------------------------------
sbf_count                                                        :           0,          0  |     0.00% |
sbf_balanced                                                     :           0,          0  |     0.00% |
sbf_pushed                                                       :           0,          0  |     0.00% |
------------------------------------------ <Wakeup Info> -------------------------------------------
ttwu_wake_remote                                                 :          31,         31  |     0.00% |
ttwu_move_affine                                                 :           2,          2  |     0.00% |
ttwu_move_balance                                                :           0,          0  |     0.00% |
----------------------------------------------------------------------------------------------------
Re: [PATCH v8] sched: update rq->avg_idle when a task is moved to an idle CPU
Posted by Vincent Guittot 2 weeks, 2 days ago
On Wed, 21 Jan 2026 at 10:33, Shubhang Kaushik
<shubhang@os.amperecomputing.com> wrote:
>
> Currently, rq->idle_stamp is only used to calculate avg_idle during
> wakeups. This means other paths that move a task to an idle CPU such as
> fork/clone, execve, or migrations, do not end the CPU's idle status in
> the scheduler's eyes, leading to an inaccurate avg_idle.
>
> This patch introduces update_rq_avg_idle() to provide a more accurate
> measurement of CPU idle duration. By invoking this helper in
> put_prev_task_idle(), we ensure avg_idle is updated whenever a CPU
> stops being idle, regardless of how the new task arrived.
>
> Changes in v8:
> - Removed the 'if (rq->idle_stamp)' check: Based on reviewer feedback,
>   tracking any idle duration (not just fair-class specific) provides a
>   more universal view of core availability.
>
> Testing on an 80-core Ampere Altra (ARMv8) with 6.19-rc5 baseline:
> - Hackbench : +7.2% performance gain at 16 threads.
> - Schbench: Reduced p99.9 tail latencies at high concurrency.
>
> Tested-by: Shubhang Kaushik <shubhang@os.amperecomputing.com>
> Signed-off-by: Shubhang Kaushik <shubhang@os.amperecomputing.com>

Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>


> ---
> This series improves the accuracy of rq->avg_idle by ensuring the CPU's idle
> duration is updated whenever a task moves to an idle CPU.
>
> The rq->idle_stamp is only cleared during wakeups. This leaves other paths
> that move a task to an idle CPU, such as fork, exec, or load balancing
> migrations, unable to end the CPU's idle status in the scheduler's view.
> This architectural gap produces stale avg_idle values, misleading the
> new idle balancer into incorrectly skipping task migrations and degrading
> overall throughput on high core count systems.
>
> v7--> v8:
>     Remove the 'if (rq->idle_stamp)' condition check in
>     update_rq_avg_idle().
>     --v7:https://lkml.org/lkml/2025/12/26/90
>
> v6--> v7:
>     Call the update_rq_avg_idle() in the put_prev_task_idle().
>     Remove the patch 1 in the original patch set.
>    --v6:https://lkml.org/lkml/2025/12/9/377
>
> v5--> v6:
>     Remove "this_rq->idle_stamp = 0;" in patch 1.
>     Update the test result with Specjbb.
>    --v5:https://lkml.org/lkml/2025/12/3/179
>
> v4--> v5:
>     Modify the changelog.
>
>    --v4:https://lkml.org/lkml/2025/11/28/300
>
> v3--> v4:
>      Remove the code for delayed task.
>
>    --v3: https://lkml.org/lkml/2025/11/27/456
>
> v2--> v3:
>   -- merge patch 3 into patch 2:
>       move update_rq_avg_idle() to enqueue_task().
>
>    --v2: https://lkml.org/lkml/2025/11/27/214
>
> v1--> v2:
>   -- Put update_rq_avg_idle() to activate_task()
>   -- Add Delay-dequeue task check.
>
>    --v1: https://lkml.org/lkml/2025/11/24/97
>
> kernel/sched/core.c | 23 +++++++++++------------
> kernel/sched/idle.c | 1 +
> kernel/sched/sched.h | 1 +
> 3 files changed, 13 insertions(+), 12 deletions(-)
> --
> 2.52.0
>
> sched/core: update rq->avg_idle when a task is moved to an idle CPU
> ---
>  kernel/sched/core.c  | 24 ++++++++++++------------
>  kernel/sched/idle.c  |  1 +
>  kernel/sched/sched.h |  1 +
>  3 files changed, 14 insertions(+), 12 deletions(-)
>
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 045f83ad261e25283d290fd064ad47cd2399dc79..81a841e22c961ff04ad291eeeed81147fd464324 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -3607,6 +3607,18 @@ static inline void ttwu_do_wakeup(struct task_struct *p)
>         trace_sched_wakeup(p);
>  }
>
> +void update_rq_avg_idle(struct rq *rq)
> +{
> +       u64 delta = rq_clock(rq) - rq->idle_stamp;
> +       u64 max = 2*rq->max_idle_balance_cost;
> +
> +       update_avg(&rq->avg_idle, delta);
> +
> +       if (rq->avg_idle > max)
> +               rq->avg_idle = max;
> +       rq->idle_stamp = 0;
> +}
> +
>  static void
>  ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
>                  struct rq_flags *rf)
> @@ -3642,18 +3654,6 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
>                 p->sched_class->task_woken(rq, p);
>                 rq_repin_lock(rq, rf);
>         }
> -
> -       if (rq->idle_stamp) {
> -               u64 delta = rq_clock(rq) - rq->idle_stamp;
> -               u64 max = 2*rq->max_idle_balance_cost;
> -
> -               update_avg(&rq->avg_idle, delta);
> -
> -               if (rq->avg_idle > max)
> -                       rq->avg_idle = max;
> -
> -               rq->idle_stamp = 0;
> -       }
>  }
>
>  /*
> diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
> index c174afe1dd177a22535417be0de1fc1b690c0368..36ddc5bcfa0383bd4d07d3c8b732ee5b8567d194 100644
> --- a/kernel/sched/idle.c
> +++ b/kernel/sched/idle.c
> @@ -460,6 +460,7 @@ static void put_prev_task_idle(struct rq *rq, struct task_struct *prev, struct t
>  {
>         update_curr_idle(rq);
>         scx_update_idle(rq, false, true);
> +       update_rq_avg_idle(rq);
>  }
>
>  static void set_next_task_idle(struct rq *rq, struct task_struct *next, bool first)
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index 93fce4bbff5eac1d4719394e89dfae886b74d865..7edf8600f2c3f45afa32bc73db2155ea6e0067f0 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -1676,6 +1676,7 @@ static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
>
>  #endif /* !CONFIG_FAIR_GROUP_SCHED */
>
> +extern void update_rq_avg_idle(struct rq *rq);
>  extern void update_rq_clock(struct rq *rq);
>
>  /*
>
> ---
> base-commit: 24d479d26b25bce5faea3ddd9fa8f3a6c3129ea7
> change-id: 20260116-v8-patch-series-5ff91b821cd4
>
> Best regards,
> --
> Shubhang Kaushik <shubhang@os.amperecomputing.com>
>