The ::avg_load field is a long-standing misnomer: it says it's an
'average load', but in reality it's the momentary sum of the load
of all currently runnable tasks. We'd have to also perform a
division by nr_running (or use time-decay) to arrive at any sort
of average value.
This is clear from comments about the math of fair scheduling:
* \Sum w_i := cfs_rq->avg_load
The sum of all weights is ... the sum of all weights, not
the average of all weights.
To make it doubly confusing, there's also an ::avg_load
in the load-balancing struct sg_lb_stats, which *is* a
true average.
The second part of the field's name is a minor misnomer
as well: it says 'load', and it is indeed a load_weight
structure as it shares code with the load-balancer - but
it's only in an SMP load-balancing context where
load = weight, in the fair scheduling context the primary
purpose is the weighting of different nice levels.
So rename the field to ::sum_weight instead, which makes
the terminology of the EEVDF math match up with our
implementation of it:
* \Sum w_i := cfs_rq->sum_weight
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
kernel/sched/fair.c | 16 ++++++++--------
kernel/sched/sched.h | 2 +-
2 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 3d6d551168aa..2ffd52a2e7a0 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -608,7 +608,7 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
*
* v0 := cfs_rq->zero_vruntime
* \Sum (v_i - v0) * w_i := cfs_rq->avg_vruntime
- * \Sum w_i := cfs_rq->avg_load
+ * \Sum w_i := cfs_rq->sum_weight
*
* Since zero_vruntime closely tracks the per-task service, these
* deltas: (v_i - v), will be in the order of the maximal (virtual) lag
@@ -625,7 +625,7 @@ avg_vruntime_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
s64 key = entity_key(cfs_rq, se);
cfs_rq->avg_vruntime += key * weight;
- cfs_rq->avg_load += weight;
+ cfs_rq->sum_weight += weight;
}
static void
@@ -635,16 +635,16 @@ avg_vruntime_sub(struct cfs_rq *cfs_rq, struct sched_entity *se)
s64 key = entity_key(cfs_rq, se);
cfs_rq->avg_vruntime -= key * weight;
- cfs_rq->avg_load -= weight;
+ cfs_rq->sum_weight -= weight;
}
static inline
void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
{
/*
- * v' = v + d ==> avg_vruntime' = avg_runtime - d*avg_load
+ * v' = v + d ==> avg_vruntime' = avg_runtime - d*sum_weight
*/
- cfs_rq->avg_vruntime -= cfs_rq->avg_load * delta;
+ cfs_rq->avg_vruntime -= cfs_rq->sum_weight * delta;
}
/*
@@ -655,7 +655,7 @@ u64 cfs_avg_vruntime(struct cfs_rq *cfs_rq)
{
struct sched_entity *curr = cfs_rq->curr;
s64 avg = cfs_rq->avg_vruntime;
- long load = cfs_rq->avg_load;
+ long load = cfs_rq->sum_weight;
if (curr && curr->on_rq) {
unsigned long weight = scale_load_down(curr->load.weight);
@@ -723,7 +723,7 @@ static int vruntime_eligible(struct cfs_rq *cfs_rq, u64 vruntime)
{
struct sched_entity *curr = cfs_rq->curr;
s64 avg = cfs_rq->avg_vruntime;
- long load = cfs_rq->avg_load;
+ long load = cfs_rq->sum_weight;
if (curr && curr->on_rq) {
unsigned long weight = scale_load_down(curr->load.weight);
@@ -5172,7 +5172,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
*
* vl_i = (W + w_i)*vl'_i / W
*/
- load = cfs_rq->avg_load;
+ load = cfs_rq->sum_weight;
if (curr && curr->on_rq)
load += scale_load_down(curr->load.weight);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 47f7b6df634c..54994d93958a 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -679,7 +679,7 @@ struct cfs_rq {
unsigned int h_nr_idle; /* SCHED_IDLE */
s64 avg_vruntime;
- u64 avg_load;
+ u64 sum_weight;
u64 zero_vruntime;
#ifdef CONFIG_SCHED_CORE
--
2.51.0
On Mon, Dec 01, 2025 at 07:46:46AM +0100, Ingo Molnar wrote:
> The ::avg_load field is a long-standing misnomer: it says it's an
> 'average load', but in reality it's the momentary sum of the load
> of all currently runnable tasks. We'd have to also perform a
> division by nr_running (or use time-decay) to arrive at any sort
> of average value.
>
> This is clear from comments about the math of fair scheduling:
>
> * \Sum w_i := cfs_rq->avg_load
>
> The sum of all weights is ... the sum of all weights, not
> the average of all weights.
>
> To make it doubly confusing, there's also an ::avg_load
> in the load-balancing struct sg_lb_stats, which *is* a
> true average.
>
> The second part of the field's name is a minor misnomer
> as well: it says 'load', and it is indeed a load_weight
> structure as it shares code with the load-balancer - but
> it's only in an SMP load-balancing context where
> load = weight, in the fair scheduling context the primary
> purpose is the weighting of different nice levels.
>
> So rename the field to ::sum_weight instead, which makes
> the terminology of the EEVDF math match up with our
> implementation of it:
>
> * \Sum w_i := cfs_rq->sum_weight
>
> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Bah, this is going to be a pain rebasing for me, but yes, these
variables are poorly named. 'sum_weight' is a better name.
> ---
> kernel/sched/fair.c | 16 ++++++++--------
> kernel/sched/sched.h | 2 +-
> 2 files changed, 9 insertions(+), 9 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 3d6d551168aa..2ffd52a2e7a0 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -608,7 +608,7 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
> *
> * v0 := cfs_rq->zero_vruntime
> * \Sum (v_i - v0) * w_i := cfs_rq->avg_vruntime
> - * \Sum w_i := cfs_rq->avg_load
> + * \Sum w_i := cfs_rq->sum_weight
> *
> * Since zero_vruntime closely tracks the per-task service, these
> * deltas: (v_i - v), will be in the order of the maximal (virtual) lag
> @@ -625,7 +625,7 @@ avg_vruntime_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
> s64 key = entity_key(cfs_rq, se);
>
> cfs_rq->avg_vruntime += key * weight;
> - cfs_rq->avg_load += weight;
> + cfs_rq->sum_weight += weight;
> }
>
> static void
> @@ -635,16 +635,16 @@ avg_vruntime_sub(struct cfs_rq *cfs_rq, struct sched_entity *se)
> s64 key = entity_key(cfs_rq, se);
>
> cfs_rq->avg_vruntime -= key * weight;
> - cfs_rq->avg_load -= weight;
> + cfs_rq->sum_weight -= weight;
> }
>
> static inline
> void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
> {
> /*
> - * v' = v + d ==> avg_vruntime' = avg_runtime - d*avg_load
> + * v' = v + d ==> avg_vruntime' = avg_runtime - d*sum_weight
> */
> - cfs_rq->avg_vruntime -= cfs_rq->avg_load * delta;
> + cfs_rq->avg_vruntime -= cfs_rq->sum_weight * delta;
> }
>
> /*
> @@ -655,7 +655,7 @@ u64 cfs_avg_vruntime(struct cfs_rq *cfs_rq)
> {
> struct sched_entity *curr = cfs_rq->curr;
> s64 avg = cfs_rq->avg_vruntime;
> - long load = cfs_rq->avg_load;
> + long load = cfs_rq->sum_weight;
>
> if (curr && curr->on_rq) {
> unsigned long weight = scale_load_down(curr->load.weight);
> @@ -723,7 +723,7 @@ static int vruntime_eligible(struct cfs_rq *cfs_rq, u64 vruntime)
> {
> struct sched_entity *curr = cfs_rq->curr;
> s64 avg = cfs_rq->avg_vruntime;
> - long load = cfs_rq->avg_load;
> + long load = cfs_rq->sum_weight;
>
> if (curr && curr->on_rq) {
> unsigned long weight = scale_load_down(curr->load.weight);
> @@ -5172,7 +5172,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
> *
> * vl_i = (W + w_i)*vl'_i / W
> */
> - load = cfs_rq->avg_load;
> + load = cfs_rq->sum_weight;
> if (curr && curr->on_rq)
> load += scale_load_down(curr->load.weight);
>
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index 47f7b6df634c..54994d93958a 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -679,7 +679,7 @@ struct cfs_rq {
> unsigned int h_nr_idle; /* SCHED_IDLE */
>
> s64 avg_vruntime;
> - u64 avg_load;
> + u64 sum_weight;
>
> u64 zero_vruntime;
> #ifdef CONFIG_SCHED_CORE
> --
> 2.51.0
>
* Peter Zijlstra <peterz@infradead.org> wrote: > On Mon, Dec 01, 2025 at 07:46:46AM +0100, Ingo Molnar wrote: > > The ::avg_load field is a long-standing misnomer: it says it's an > > 'average load', but in reality it's the momentary sum of the load > > of all currently runnable tasks. We'd have to also perform a > > division by nr_running (or use time-decay) to arrive at any sort > > of average value. > > > > This is clear from comments about the math of fair scheduling: > > > > * \Sum w_i := cfs_rq->avg_load > > > > The sum of all weights is ... the sum of all weights, not > > the average of all weights. > > > > To make it doubly confusing, there's also an ::avg_load > > in the load-balancing struct sg_lb_stats, which *is* a > > true average. > > > > The second part of the field's name is a minor misnomer > > as well: it says 'load', and it is indeed a load_weight > > structure as it shares code with the load-balancer - but > > it's only in an SMP load-balancing context where > > load = weight, in the fair scheduling context the primary > > purpose is the weighting of different nice levels. > > > > So rename the field to ::sum_weight instead, which makes > > the terminology of the EEVDF math match up with our > > implementation of it: > > > > * \Sum w_i := cfs_rq->sum_weight > > > > Signed-off-by: Ingo Molnar <mingo@kernel.org> > > Bah, this is going to be a pain rebasing for me, but yes, these > variables are poorly named. 'sum_weight' is a better name. Fair enough, and to make this easier for you I've rebased your worst affected tree (queue.git:sched/flat) on top of the mingo/tip:WIP.sched/core-for-v6.20 tree, which includes these renames (with all your feedback addressed AFAICT), see: git://git.kernel.org/pub/scm/linux/kernel/git/mingo/tip.git WIP.sched/flat ... and it builds and boots. :-) Thanks, Ingo
The following commit has been merged into the sched/core branch of tip:
Commit-ID: 4ff674fa986c27ec8a0542479258c92d361a2566
Gitweb: https://git.kernel.org/tip/4ff674fa986c27ec8a0542479258c92d361a2566
Author: Ingo Molnar <mingo@kernel.org>
AuthorDate: Wed, 26 Nov 2025 12:09:16 +01:00
Committer: Ingo Molnar <mingo@kernel.org>
CommitterDate: Mon, 15 Dec 2025 07:52:44 +01:00
sched/fair: Rename cfs_rq::avg_load to cfs_rq::sum_weight
The ::avg_load field is a long-standing misnomer: it says it's an
'average load', but in reality it's the momentary sum of the load
of all currently runnable tasks. We'd have to also perform a
division by nr_running (or use time-decay) to arrive at any sort
of average value.
This is clear from comments about the math of fair scheduling:
* \Sum w_i := cfs_rq->avg_load
The sum of all weights is ... the sum of all weights, not
the average of all weights.
To make it doubly confusing, there's also an ::avg_load
in the load-balancing struct sg_lb_stats, which *is* a
true average.
The second part of the field's name is a minor misnomer
as well: it says 'load', and it is indeed a load_weight
structure as it shares code with the load-balancer - but
it's only in an SMP load-balancing context where
load = weight, in the fair scheduling context the primary
purpose is the weighting of different nice levels.
So rename the field to ::sum_weight instead, which makes
the terminology of the EEVDF math match up with our
implementation of it:
* \Sum w_i := cfs_rq->sum_weight
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://patch.msgid.link/20251201064647.1851919-6-mingo@kernel.org
---
kernel/sched/fair.c | 16 ++++++++--------
kernel/sched/sched.h | 2 +-
2 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f79951f..65b1065 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -608,7 +608,7 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
*
* v0 := cfs_rq->zero_vruntime
* \Sum (v_i - v0) * w_i := cfs_rq->avg_vruntime
- * \Sum w_i := cfs_rq->avg_load
+ * \Sum w_i := cfs_rq->sum_weight
*
* Since zero_vruntime closely tracks the per-task service, these
* deltas: (v_i - v), will be in the order of the maximal (virtual) lag
@@ -625,7 +625,7 @@ avg_vruntime_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
s64 key = entity_key(cfs_rq, se);
cfs_rq->avg_vruntime += key * weight;
- cfs_rq->avg_load += weight;
+ cfs_rq->sum_weight += weight;
}
static void
@@ -635,16 +635,16 @@ avg_vruntime_sub(struct cfs_rq *cfs_rq, struct sched_entity *se)
s64 key = entity_key(cfs_rq, se);
cfs_rq->avg_vruntime -= key * weight;
- cfs_rq->avg_load -= weight;
+ cfs_rq->sum_weight -= weight;
}
static inline
void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
{
/*
- * v' = v + d ==> avg_vruntime' = avg_runtime - d*avg_load
+ * v' = v + d ==> avg_vruntime' = avg_runtime - d*sum_weight
*/
- cfs_rq->avg_vruntime -= cfs_rq->avg_load * delta;
+ cfs_rq->avg_vruntime -= cfs_rq->sum_weight * delta;
}
/*
@@ -655,7 +655,7 @@ u64 avg_vruntime(struct cfs_rq *cfs_rq)
{
struct sched_entity *curr = cfs_rq->curr;
s64 avg = cfs_rq->avg_vruntime;
- long load = cfs_rq->avg_load;
+ long load = cfs_rq->sum_weight;
if (curr && curr->on_rq) {
unsigned long weight = scale_load_down(curr->load.weight);
@@ -723,7 +723,7 @@ static int vruntime_eligible(struct cfs_rq *cfs_rq, u64 vruntime)
{
struct sched_entity *curr = cfs_rq->curr;
s64 avg = cfs_rq->avg_vruntime;
- long load = cfs_rq->avg_load;
+ long load = cfs_rq->sum_weight;
if (curr && curr->on_rq) {
unsigned long weight = scale_load_down(curr->load.weight);
@@ -5131,7 +5131,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
*
* vl_i = (W + w_i)*vl'_i / W
*/
- load = cfs_rq->avg_load;
+ load = cfs_rq->sum_weight;
if (curr && curr->on_rq)
load += scale_load_down(curr->load.weight);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 82522c9..3334aa5 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -679,7 +679,7 @@ struct cfs_rq {
unsigned int h_nr_idle; /* SCHED_IDLE */
s64 avg_vruntime;
- u64 avg_load;
+ u64 sum_weight;
u64 zero_vruntime;
#ifdef CONFIG_SCHED_CORE
The following commit has been merged into the sched/core branch of tip:
Commit-ID: 969c658869ff1c3998a449d2602c68b1d4b1ce06
Gitweb: https://git.kernel.org/tip/969c658869ff1c3998a449d2602c68b1d4b1ce06
Author: Ingo Molnar <mingo@kernel.org>
AuthorDate: Wed, 26 Nov 2025 12:09:16 +01:00
Committer: Ingo Molnar <mingo@kernel.org>
CommitterDate: Sun, 14 Dec 2025 08:25:03 +01:00
sched/fair: Rename cfs_rq::avg_load to cfs_rq::sum_weight
The ::avg_load field is a long-standing misnomer: it says it's an
'average load', but in reality it's the momentary sum of the load
of all currently runnable tasks. We'd have to also perform a
division by nr_running (or use time-decay) to arrive at any sort
of average value.
This is clear from comments about the math of fair scheduling:
* \Sum w_i := cfs_rq->avg_load
The sum of all weights is ... the sum of all weights, not
the average of all weights.
To make it doubly confusing, there's also an ::avg_load
in the load-balancing struct sg_lb_stats, which *is* a
true average.
The second part of the field's name is a minor misnomer
as well: it says 'load', and it is indeed a load_weight
structure as it shares code with the load-balancer - but
it's only in an SMP load-balancing context where
load = weight, in the fair scheduling context the primary
purpose is the weighting of different nice levels.
So rename the field to ::sum_weight instead, which makes
the terminology of the EEVDF math match up with our
implementation of it:
* \Sum w_i := cfs_rq->sum_weight
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://patch.msgid.link/20251201064647.1851919-6-mingo@kernel.org
---
kernel/sched/fair.c | 16 ++++++++--------
kernel/sched/sched.h | 2 +-
2 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ea276d8..8e21a95 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -608,7 +608,7 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
*
* v0 := cfs_rq->zero_vruntime
* \Sum (v_i - v0) * w_i := cfs_rq->avg_vruntime
- * \Sum w_i := cfs_rq->avg_load
+ * \Sum w_i := cfs_rq->sum_weight
*
* Since zero_vruntime closely tracks the per-task service, these
* deltas: (v_i - v), will be in the order of the maximal (virtual) lag
@@ -625,7 +625,7 @@ avg_vruntime_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
s64 key = entity_key(cfs_rq, se);
cfs_rq->avg_vruntime += key * weight;
- cfs_rq->avg_load += weight;
+ cfs_rq->sum_weight += weight;
}
static void
@@ -635,16 +635,16 @@ avg_vruntime_sub(struct cfs_rq *cfs_rq, struct sched_entity *se)
s64 key = entity_key(cfs_rq, se);
cfs_rq->avg_vruntime -= key * weight;
- cfs_rq->avg_load -= weight;
+ cfs_rq->sum_weight -= weight;
}
static inline
void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
{
/*
- * v' = v + d ==> avg_vruntime' = avg_runtime - d*avg_load
+ * v' = v + d ==> avg_vruntime' = avg_runtime - d*sum_weight
*/
- cfs_rq->avg_vruntime -= cfs_rq->avg_load * delta;
+ cfs_rq->avg_vruntime -= cfs_rq->sum_weight * delta;
}
/*
@@ -655,7 +655,7 @@ u64 avg_vruntime(struct cfs_rq *cfs_rq)
{
struct sched_entity *curr = cfs_rq->curr;
s64 avg = cfs_rq->avg_vruntime;
- long load = cfs_rq->avg_load;
+ long load = cfs_rq->sum_weight;
if (curr && curr->on_rq) {
unsigned long weight = scale_load_down(curr->load.weight);
@@ -723,7 +723,7 @@ static int vruntime_eligible(struct cfs_rq *cfs_rq, u64 vruntime)
{
struct sched_entity *curr = cfs_rq->curr;
s64 avg = cfs_rq->avg_vruntime;
- long load = cfs_rq->avg_load;
+ long load = cfs_rq->sum_weight;
if (curr && curr->on_rq) {
unsigned long weight = scale_load_down(curr->load.weight);
@@ -5131,7 +5131,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
*
* vl_i = (W + w_i)*vl'_i / W
*/
- load = cfs_rq->avg_load;
+ load = cfs_rq->sum_weight;
if (curr && curr->on_rq)
load += scale_load_down(curr->load.weight);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 4ddb755..e3e9974 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -679,7 +679,7 @@ struct cfs_rq {
unsigned int h_nr_idle; /* SCHED_IDLE */
s64 avg_vruntime;
- u64 avg_load;
+ u64 sum_weight;
u64 zero_vruntime;
#ifdef CONFIG_SCHED_CORE
© 2016 - 2026 Red Hat, Inc.