The ::avg_vruntime field is a misnomer: it says it's an
'average vruntime', but in reality it's the momentary sum
of the weighted vruntimes of all queued tasks, which is
at least a division away from being an average.
This is clear from comments about the math of fair scheduling:
* \Sum (v_i - v0) * w_i := cfs_rq->avg_vruntime
This confusion is increased by the cfs_avg_vruntime() function,
which does perform the division and returns a true average.
The sum of all weighted vruntimes should be named thusly,
so rename the field to ::sum_w_vruntime. (As arguably
::sum_weighted_vruntime would be a bit of a mouthful.)
Understanding the scheduler is hard enough already, without
extra layers of obfuscated naming. ;-)
Also rename related helper functions:
sum_vruntime_add() => sum_w_vruntime_add()
sum_vruntime_sub() => sum_w_vruntime_sub()
sum_vruntime_update() => sum_w_vruntime_update()
With the notable exception of cfs_avg_vruntime(), which
was named accurately.
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
kernel/sched/debug.c | 2 +-
kernel/sched/fair.c | 26 +++++++++++++-------------
kernel/sched/sched.h | 2 +-
3 files changed, 15 insertions(+), 15 deletions(-)
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index a6ceda12bd35..b6fa5ca6a932 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -828,7 +828,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
SPLIT_NS(left_vruntime));
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "zero_vruntime",
SPLIT_NS(zero_vruntime));
- SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "avg_vruntime",
+ SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "sum_w_vruntime",
SPLIT_NS(cfs_avg_vruntime(cfs_rq)));
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "right_vruntime",
SPLIT_NS(right_vruntime));
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2ffd52a2e7a0..41ede30b74cd 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -607,7 +607,7 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
* Which we track using:
*
* v0 := cfs_rq->zero_vruntime
- * \Sum (v_i - v0) * w_i := cfs_rq->avg_vruntime
+ * \Sum (v_i - v0) * w_i := cfs_rq->sum_w_vruntime
* \Sum w_i := cfs_rq->sum_weight
*
* Since zero_vruntime closely tracks the per-task service, these
@@ -619,32 +619,32 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
* As measured, the max (key * weight) value was ~44 bits for a kernel build.
*/
static void
-avg_vruntime_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
+sum_w_vruntime_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
unsigned long weight = scale_load_down(se->load.weight);
s64 key = entity_key(cfs_rq, se);
- cfs_rq->avg_vruntime += key * weight;
+ cfs_rq->sum_w_vruntime += key * weight;
cfs_rq->sum_weight += weight;
}
static void
-avg_vruntime_sub(struct cfs_rq *cfs_rq, struct sched_entity *se)
+sum_w_vruntime_sub(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
unsigned long weight = scale_load_down(se->load.weight);
s64 key = entity_key(cfs_rq, se);
- cfs_rq->avg_vruntime -= key * weight;
+ cfs_rq->sum_w_vruntime -= key * weight;
cfs_rq->sum_weight -= weight;
}
static inline
-void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
+void sum_w_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
{
/*
- * v' = v + d ==> avg_vruntime' = avg_runtime - d*sum_weight
+ * v' = v + d ==> sum_w_vruntime' = sum_runtime - d*sum_weight
*/
- cfs_rq->avg_vruntime -= cfs_rq->sum_weight * delta;
+ cfs_rq->sum_w_vruntime -= cfs_rq->sum_weight * delta;
}
/*
@@ -654,7 +654,7 @@ void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
u64 cfs_avg_vruntime(struct cfs_rq *cfs_rq)
{
struct sched_entity *curr = cfs_rq->curr;
- s64 avg = cfs_rq->avg_vruntime;
+ s64 avg = cfs_rq->sum_w_vruntime;
long load = cfs_rq->sum_weight;
if (curr && curr->on_rq) {
@@ -722,7 +722,7 @@ static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
static int vruntime_eligible(struct cfs_rq *cfs_rq, u64 vruntime)
{
struct sched_entity *curr = cfs_rq->curr;
- s64 avg = cfs_rq->avg_vruntime;
+ s64 avg = cfs_rq->sum_w_vruntime;
long load = cfs_rq->sum_weight;
if (curr && curr->on_rq) {
@@ -745,7 +745,7 @@ static void update_zero_vruntime(struct cfs_rq *cfs_rq)
u64 vruntime = cfs_avg_vruntime(cfs_rq);
s64 delta = (s64)(vruntime - cfs_rq->zero_vruntime);
- avg_vruntime_update(cfs_rq, delta);
+ sum_w_vruntime_update(cfs_rq, delta);
cfs_rq->zero_vruntime = vruntime;
}
@@ -819,7 +819,7 @@ RB_DECLARE_CALLBACKS(static, min_vruntime_cb, struct sched_entity,
*/
static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
- avg_vruntime_add(cfs_rq, se);
+ sum_w_vruntime_add(cfs_rq, se);
update_zero_vruntime(cfs_rq);
se->min_vruntime = se->vruntime;
se->min_slice = se->slice;
@@ -831,7 +831,7 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
rb_erase_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
&min_vruntime_cb);
- avg_vruntime_sub(cfs_rq, se);
+ sum_w_vruntime_sub(cfs_rq, se);
update_zero_vruntime(cfs_rq);
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 54994d93958a..f0eb58458ff3 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -678,7 +678,7 @@ struct cfs_rq {
unsigned int h_nr_runnable; /* SCHED_{NORMAL,BATCH,IDLE} */
unsigned int h_nr_idle; /* SCHED_IDLE */
- s64 avg_vruntime;
+ s64 sum_w_vruntime;
u64 sum_weight;
u64 zero_vruntime;
--
2.51.0
On Mon, Dec 01, 2025 at 07:46:47AM +0100, Ingo Molnar wrote: > The ::avg_vruntime field is a misnomer: it says it's an > 'average vruntime', but in reality it's the momentary sum > of the weighted vruntimes of all queued tasks, which is > at least a division away from being an average. > > This is clear from comments about the math of fair scheduling: > > * \Sum (v_i - v0) * w_i := cfs_rq->avg_vruntime > > This confusion is increased by the cfs_avg_vruntime() function, > which does perform the division and returns a true average. > > The sum of all weighted vruntimes should be named thusly, > so rename the field to ::sum_w_vruntime. (As arguably > ::sum_weighted_vruntime would be a bit of a mouthful.) > > Understanding the scheduler is hard enough already, without > extra layers of obfuscated naming. ;-) > > Also rename related helper functions: > > sum_vruntime_add() => sum_w_vruntime_add() > sum_vruntime_sub() => sum_w_vruntime_sub() > sum_vruntime_update() => sum_w_vruntime_update() So vruntime := runtime / w, so w*vruntime is runtime again. I'm sure there's something there. /me runs. But yeah no arguments this naming needs help. > With the notable exception of cfs_avg_vruntime(), which > was named accurately. But the old avg_vruntime() name was good too!
* Peter Zijlstra <peterz@infradead.org> wrote: > On Mon, Dec 01, 2025 at 07:46:47AM +0100, Ingo Molnar wrote: > > The ::avg_vruntime field is a misnomer: it says it's an > > 'average vruntime', but in reality it's the momentary sum > > of the weighted vruntimes of all queued tasks, which is > > at least a division away from being an average. > > > > This is clear from comments about the math of fair scheduling: > > > > * \Sum (v_i - v0) * w_i := cfs_rq->avg_vruntime > > > > This confusion is increased by the cfs_avg_vruntime() function, > > which does perform the division and returns a true average. > > > > The sum of all weighted vruntimes should be named thusly, > > so rename the field to ::sum_w_vruntime. (As arguably > > ::sum_weighted_vruntime would be a bit of a mouthful.) > > > > Understanding the scheduler is hard enough already, without > > extra layers of obfuscated naming. ;-) > > > > Also rename related helper functions: > > > > sum_vruntime_add() => sum_w_vruntime_add() > > sum_vruntime_sub() => sum_w_vruntime_sub() > > sum_vruntime_update() => sum_w_vruntime_update() > > So vruntime := runtime / w, so w*vruntime is runtime again. I'm sure > there's something there. Haha, yes. It's delta_exec all the way down, and turtles. > /me runs. > But yeah no arguments this naming needs help. > > > With the notable exception of cfs_avg_vruntime(), which > > was named accurately. > > But the old avg_vruntime() name was good too! Yeah. It's now restored in its old glory. :-) Thanks, Ingo
The following commit has been merged into the sched/core branch of tip:
Commit-ID: dcbc9d3f0e594223275a18f7016001889ad35eff
Gitweb: https://git.kernel.org/tip/dcbc9d3f0e594223275a18f7016001889ad35eff
Author: Ingo Molnar <mingo@kernel.org>
AuthorDate: Tue, 02 Dec 2025 16:09:23 +01:00
Committer: Ingo Molnar <mingo@kernel.org>
CommitterDate: Mon, 15 Dec 2025 07:52:44 +01:00
sched/fair: Rename cfs_rq::avg_vruntime to ::sum_w_vruntime, and helper functions
The ::avg_vruntime field is a misnomer: it says it's an
'average vruntime', but in reality it's the momentary sum
of the weighted vruntimes of all queued tasks, which is
at least a division away from being an average.
This is clear from comments about the math of fair scheduling:
* \Sum (v_i - v0) * w_i := cfs_rq->avg_vruntime
This confusion is increased by the cfs_avg_vruntime() function,
which does perform the division and returns a true average.
The sum of all weighted vruntimes should be named thusly,
so rename the field to ::sum_w_vruntime. (As arguably
::sum_weighted_vruntime would be a bit of a mouthful.)
Understanding the scheduler is hard enough already, without
extra layers of obfuscated naming. ;-)
Also rename related helper functions:
sum_vruntime_add() => sum_w_vruntime_add()
sum_vruntime_sub() => sum_w_vruntime_sub()
sum_vruntime_update() => sum_w_vruntime_update()
With the notable exception of cfs_avg_vruntime(), which
was named accurately.
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://patch.msgid.link/20251201064647.1851919-7-mingo@kernel.org
---
kernel/sched/fair.c | 26 +++++++++++++-------------
kernel/sched/sched.h | 2 +-
2 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 65b1065..dcbd995 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -607,7 +607,7 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
* Which we track using:
*
* v0 := cfs_rq->zero_vruntime
- * \Sum (v_i - v0) * w_i := cfs_rq->avg_vruntime
+ * \Sum (v_i - v0) * w_i := cfs_rq->sum_w_vruntime
* \Sum w_i := cfs_rq->sum_weight
*
* Since zero_vruntime closely tracks the per-task service, these
@@ -619,32 +619,32 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
* As measured, the max (key * weight) value was ~44 bits for a kernel build.
*/
static void
-avg_vruntime_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
+sum_w_vruntime_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
unsigned long weight = scale_load_down(se->load.weight);
s64 key = entity_key(cfs_rq, se);
- cfs_rq->avg_vruntime += key * weight;
+ cfs_rq->sum_w_vruntime += key * weight;
cfs_rq->sum_weight += weight;
}
static void
-avg_vruntime_sub(struct cfs_rq *cfs_rq, struct sched_entity *se)
+sum_w_vruntime_sub(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
unsigned long weight = scale_load_down(se->load.weight);
s64 key = entity_key(cfs_rq, se);
- cfs_rq->avg_vruntime -= key * weight;
+ cfs_rq->sum_w_vruntime -= key * weight;
cfs_rq->sum_weight -= weight;
}
static inline
-void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
+void sum_w_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
{
/*
- * v' = v + d ==> avg_vruntime' = avg_runtime - d*sum_weight
+ * v' = v + d ==> sum_w_vruntime' = sum_runtime - d*sum_weight
*/
- cfs_rq->avg_vruntime -= cfs_rq->sum_weight * delta;
+ cfs_rq->sum_w_vruntime -= cfs_rq->sum_weight * delta;
}
/*
@@ -654,7 +654,7 @@ void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
u64 avg_vruntime(struct cfs_rq *cfs_rq)
{
struct sched_entity *curr = cfs_rq->curr;
- s64 avg = cfs_rq->avg_vruntime;
+ s64 avg = cfs_rq->sum_w_vruntime;
long load = cfs_rq->sum_weight;
if (curr && curr->on_rq) {
@@ -722,7 +722,7 @@ static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
static int vruntime_eligible(struct cfs_rq *cfs_rq, u64 vruntime)
{
struct sched_entity *curr = cfs_rq->curr;
- s64 avg = cfs_rq->avg_vruntime;
+ s64 avg = cfs_rq->sum_w_vruntime;
long load = cfs_rq->sum_weight;
if (curr && curr->on_rq) {
@@ -745,7 +745,7 @@ static void update_zero_vruntime(struct cfs_rq *cfs_rq)
u64 vruntime = avg_vruntime(cfs_rq);
s64 delta = (s64)(vruntime - cfs_rq->zero_vruntime);
- avg_vruntime_update(cfs_rq, delta);
+ sum_w_vruntime_update(cfs_rq, delta);
cfs_rq->zero_vruntime = vruntime;
}
@@ -819,7 +819,7 @@ RB_DECLARE_CALLBACKS(static, min_vruntime_cb, struct sched_entity,
*/
static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
- avg_vruntime_add(cfs_rq, se);
+ sum_w_vruntime_add(cfs_rq, se);
update_zero_vruntime(cfs_rq);
se->min_vruntime = se->vruntime;
se->min_slice = se->slice;
@@ -831,7 +831,7 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
rb_erase_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
&min_vruntime_cb);
- avg_vruntime_sub(cfs_rq, se);
+ sum_w_vruntime_sub(cfs_rq, se);
update_zero_vruntime(cfs_rq);
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 3334aa5..ab1bfa0 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -678,7 +678,7 @@ struct cfs_rq {
unsigned int h_nr_runnable; /* SCHED_{NORMAL,BATCH,IDLE} */
unsigned int h_nr_idle; /* SCHED_IDLE */
- s64 avg_vruntime;
+ s64 sum_w_vruntime;
u64 sum_weight;
u64 zero_vruntime;
The following commit has been merged into the sched/core branch of tip:
Commit-ID: 8e9aa35061d3fc0b5ab4e9b6247a0dfa600212eb
Gitweb: https://git.kernel.org/tip/8e9aa35061d3fc0b5ab4e9b6247a0dfa600212eb
Author: Ingo Molnar <mingo@kernel.org>
AuthorDate: Tue, 02 Dec 2025 16:09:23 +01:00
Committer: Ingo Molnar <mingo@kernel.org>
CommitterDate: Sun, 14 Dec 2025 08:25:03 +01:00
sched/fair: Rename cfs_rq::avg_vruntime to ::sum_w_vruntime, and helper functions
The ::avg_vruntime field is a misnomer: it says it's an
'average vruntime', but in reality it's the momentary sum
of the weighted vruntimes of all queued tasks, which is
at least a division away from being an average.
This is clear from comments about the math of fair scheduling:
* \Sum (v_i - v0) * w_i := cfs_rq->avg_vruntime
This confusion is increased by the cfs_avg_vruntime() function,
which does perform the division and returns a true average.
The sum of all weighted vruntimes should be named thusly,
so rename the field to ::sum_w_vruntime. (As arguably
::sum_weighted_vruntime would be a bit of a mouthful.)
Understanding the scheduler is hard enough already, without
extra layers of obfuscated naming. ;-)
Also rename related helper functions:
sum_vruntime_add() => sum_w_vruntime_add()
sum_vruntime_sub() => sum_w_vruntime_sub()
sum_vruntime_update() => sum_w_vruntime_update()
With the notable exception of cfs_avg_vruntime(), which
was named accurately.
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://patch.msgid.link/20251201064647.1851919-7-mingo@kernel.org
---
kernel/sched/fair.c | 26 +++++++++++++-------------
kernel/sched/sched.h | 2 +-
2 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 8e21a95..050ed08 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -607,7 +607,7 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
* Which we track using:
*
* v0 := cfs_rq->zero_vruntime
- * \Sum (v_i - v0) * w_i := cfs_rq->avg_vruntime
+ * \Sum (v_i - v0) * w_i := cfs_rq->sum_w_vruntime
* \Sum w_i := cfs_rq->sum_weight
*
* Since zero_vruntime closely tracks the per-task service, these
@@ -619,32 +619,32 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
* As measured, the max (key * weight) value was ~44 bits for a kernel build.
*/
static void
-avg_vruntime_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
+sum_w_vruntime_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
unsigned long weight = scale_load_down(se->load.weight);
s64 key = entity_key(cfs_rq, se);
- cfs_rq->avg_vruntime += key * weight;
+ cfs_rq->sum_w_vruntime += key * weight;
cfs_rq->sum_weight += weight;
}
static void
-avg_vruntime_sub(struct cfs_rq *cfs_rq, struct sched_entity *se)
+sum_w_vruntime_sub(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
unsigned long weight = scale_load_down(se->load.weight);
s64 key = entity_key(cfs_rq, se);
- cfs_rq->avg_vruntime -= key * weight;
+ cfs_rq->sum_w_vruntime -= key * weight;
cfs_rq->sum_weight -= weight;
}
static inline
-void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
+void sum_w_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
{
/*
- * v' = v + d ==> avg_vruntime' = avg_runtime - d*sum_weight
+ * v' = v + d ==> sum_w_vruntime' = sum_runtime - d*sum_weight
*/
- cfs_rq->avg_vruntime -= cfs_rq->sum_weight * delta;
+ cfs_rq->sum_w_vruntime -= cfs_rq->sum_weight * delta;
}
/*
@@ -654,7 +654,7 @@ void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
u64 avg_vruntime(struct cfs_rq *cfs_rq)
{
struct sched_entity *curr = cfs_rq->curr;
- s64 avg = cfs_rq->avg_vruntime;
+ s64 avg = cfs_rq->sum_w_vruntime;
long load = cfs_rq->sum_weight;
if (curr && curr->on_rq) {
@@ -722,7 +722,7 @@ static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
static int vruntime_eligible(struct cfs_rq *cfs_rq, u64 vruntime)
{
struct sched_entity *curr = cfs_rq->curr;
- s64 avg = cfs_rq->avg_vruntime;
+ s64 avg = cfs_rq->sum_w_vruntime;
long load = cfs_rq->sum_weight;
if (curr && curr->on_rq) {
@@ -745,7 +745,7 @@ static void update_zero_vruntime(struct cfs_rq *cfs_rq)
u64 vruntime = avg_vruntime(cfs_rq);
s64 delta = (s64)(vruntime - cfs_rq->zero_vruntime);
- avg_vruntime_update(cfs_rq, delta);
+ sum_w_vruntime_update(cfs_rq, delta);
cfs_rq->zero_vruntime = vruntime;
}
@@ -819,7 +819,7 @@ RB_DECLARE_CALLBACKS(static, min_vruntime_cb, struct sched_entity,
*/
static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
- avg_vruntime_add(cfs_rq, se);
+ sum_w_vruntime_add(cfs_rq, se);
update_zero_vruntime(cfs_rq);
se->min_vruntime = se->vruntime;
se->min_slice = se->slice;
@@ -831,7 +831,7 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
rb_erase_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
&min_vruntime_cb);
- avg_vruntime_sub(cfs_rq, se);
+ sum_w_vruntime_sub(cfs_rq, se);
update_zero_vruntime(cfs_rq);
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e3e9974..bdb1e74 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -678,7 +678,7 @@ struct cfs_rq {
unsigned int h_nr_runnable; /* SCHED_{NORMAL,BATCH,IDLE} */
unsigned int h_nr_idle; /* SCHED_IDLE */
- s64 avg_vruntime;
+ s64 sum_w_vruntime;
u64 sum_weight;
u64 zero_vruntime;
© 2016 - 2026 Red Hat, Inc.