[PATCH 5/6] sched/fair: Rename cfs_rq::avg_load to cfs_rq::sum_weight

Ingo Molnar posted 6 patches 2 months, 1 week ago
[PATCH 5/6] sched/fair: Rename cfs_rq::avg_load to cfs_rq::sum_weight
Posted by Ingo Molnar 2 months, 1 week ago
The ::avg_load field is a long-standing misnomer: it says it's an
'average load', but in reality it's the momentary sum of the load
of all currently runnable tasks. We'd have to also perform a
division by nr_running (or use time-decay) to arrive at any sort
of average value.

This is clear from comments about the math of fair scheduling:

    *              \Sum w_i := cfs_rq->avg_load

The sum of all weights is ... the sum of all weights, not
the average of all weights.

To make it doubly confusing, there's also an ::avg_load
in the load-balancing struct sg_lb_stats, which *is* a
true average.

The second part of the field's name is a minor misnomer
as well: it says 'load', and it is indeed a load_weight
structure as it shares code with the load-balancer - but
it's only in an SMP load-balancing context where
load = weight, in the fair scheduling context the primary
purpose is the weighting of different nice levels.

So rename the field to ::sum_weight instead, which makes
the terminology of the EEVDF math match up with our
implementation of it:

    *              \Sum w_i := cfs_rq->sum_weight

Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/fair.c  | 16 ++++++++--------
 kernel/sched/sched.h |  2 +-
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 3d6d551168aa..2ffd52a2e7a0 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -608,7 +608,7 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
  *
  *                    v0 := cfs_rq->zero_vruntime
  * \Sum (v_i - v0) * w_i := cfs_rq->avg_vruntime
- *              \Sum w_i := cfs_rq->avg_load
+ *              \Sum w_i := cfs_rq->sum_weight
  *
  * Since zero_vruntime closely tracks the per-task service, these
  * deltas: (v_i - v), will be in the order of the maximal (virtual) lag
@@ -625,7 +625,7 @@ avg_vruntime_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	s64 key = entity_key(cfs_rq, se);
 
 	cfs_rq->avg_vruntime += key * weight;
-	cfs_rq->avg_load += weight;
+	cfs_rq->sum_weight += weight;
 }
 
 static void
@@ -635,16 +635,16 @@ avg_vruntime_sub(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	s64 key = entity_key(cfs_rq, se);
 
 	cfs_rq->avg_vruntime -= key * weight;
-	cfs_rq->avg_load -= weight;
+	cfs_rq->sum_weight -= weight;
 }
 
 static inline
 void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
 {
 	/*
-	 * v' = v + d ==> avg_vruntime' = avg_runtime - d*avg_load
+	 * v' = v + d ==> avg_vruntime' = avg_runtime - d*sum_weight
 	 */
-	cfs_rq->avg_vruntime -= cfs_rq->avg_load * delta;
+	cfs_rq->avg_vruntime -= cfs_rq->sum_weight * delta;
 }
 
 /*
@@ -655,7 +655,7 @@ u64 cfs_avg_vruntime(struct cfs_rq *cfs_rq)
 {
 	struct sched_entity *curr = cfs_rq->curr;
 	s64 avg = cfs_rq->avg_vruntime;
-	long load = cfs_rq->avg_load;
+	long load = cfs_rq->sum_weight;
 
 	if (curr && curr->on_rq) {
 		unsigned long weight = scale_load_down(curr->load.weight);
@@ -723,7 +723,7 @@ static int vruntime_eligible(struct cfs_rq *cfs_rq, u64 vruntime)
 {
 	struct sched_entity *curr = cfs_rq->curr;
 	s64 avg = cfs_rq->avg_vruntime;
-	long load = cfs_rq->avg_load;
+	long load = cfs_rq->sum_weight;
 
 	if (curr && curr->on_rq) {
 		unsigned long weight = scale_load_down(curr->load.weight);
@@ -5172,7 +5172,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 		 *
 		 *   vl_i = (W + w_i)*vl'_i / W
 		 */
-		load = cfs_rq->avg_load;
+		load = cfs_rq->sum_weight;
 		if (curr && curr->on_rq)
 			load += scale_load_down(curr->load.weight);
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 47f7b6df634c..54994d93958a 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -679,7 +679,7 @@ struct cfs_rq {
 	unsigned int		h_nr_idle;		/* SCHED_IDLE */
 
 	s64			avg_vruntime;
-	u64			avg_load;
+	u64			sum_weight;
 
 	u64			zero_vruntime;
 #ifdef CONFIG_SCHED_CORE
-- 
2.51.0
Re: [PATCH 5/6] sched/fair: Rename cfs_rq::avg_load to cfs_rq::sum_weight
Posted by Peter Zijlstra 2 months ago
On Mon, Dec 01, 2025 at 07:46:46AM +0100, Ingo Molnar wrote:
> The ::avg_load field is a long-standing misnomer: it says it's an
> 'average load', but in reality it's the momentary sum of the load
> of all currently runnable tasks. We'd have to also perform a
> division by nr_running (or use time-decay) to arrive at any sort
> of average value.
> 
> This is clear from comments about the math of fair scheduling:
> 
>     *              \Sum w_i := cfs_rq->avg_load
> 
> The sum of all weights is ... the sum of all weights, not
> the average of all weights.
> 
> To make it doubly confusing, there's also an ::avg_load
> in the load-balancing struct sg_lb_stats, which *is* a
> true average.
> 
> The second part of the field's name is a minor misnomer
> as well: it says 'load', and it is indeed a load_weight
> structure as it shares code with the load-balancer - but
> it's only in an SMP load-balancing context where
> load = weight, in the fair scheduling context the primary
> purpose is the weighting of different nice levels.
> 
> So rename the field to ::sum_weight instead, which makes
> the terminology of the EEVDF math match up with our
> implementation of it:
> 
>     *              \Sum w_i := cfs_rq->sum_weight
> 
> Signed-off-by: Ingo Molnar <mingo@kernel.org>

Bah, this is going to be a pain rebasing for me, but yes, these
variables are poorly named. 'sum_weight' is a better name.

> ---
>  kernel/sched/fair.c  | 16 ++++++++--------
>  kernel/sched/sched.h |  2 +-
>  2 files changed, 9 insertions(+), 9 deletions(-)
> 
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 3d6d551168aa..2ffd52a2e7a0 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -608,7 +608,7 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
>   *
>   *                    v0 := cfs_rq->zero_vruntime
>   * \Sum (v_i - v0) * w_i := cfs_rq->avg_vruntime
> - *              \Sum w_i := cfs_rq->avg_load
> + *              \Sum w_i := cfs_rq->sum_weight
>   *
>   * Since zero_vruntime closely tracks the per-task service, these
>   * deltas: (v_i - v), will be in the order of the maximal (virtual) lag
> @@ -625,7 +625,7 @@ avg_vruntime_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
>  	s64 key = entity_key(cfs_rq, se);
>  
>  	cfs_rq->avg_vruntime += key * weight;
> -	cfs_rq->avg_load += weight;
> +	cfs_rq->sum_weight += weight;
>  }
>  
>  static void
> @@ -635,16 +635,16 @@ avg_vruntime_sub(struct cfs_rq *cfs_rq, struct sched_entity *se)
>  	s64 key = entity_key(cfs_rq, se);
>  
>  	cfs_rq->avg_vruntime -= key * weight;
> -	cfs_rq->avg_load -= weight;
> +	cfs_rq->sum_weight -= weight;
>  }
>  
>  static inline
>  void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
>  {
>  	/*
> -	 * v' = v + d ==> avg_vruntime' = avg_runtime - d*avg_load
> +	 * v' = v + d ==> avg_vruntime' = avg_runtime - d*sum_weight
>  	 */
> -	cfs_rq->avg_vruntime -= cfs_rq->avg_load * delta;
> +	cfs_rq->avg_vruntime -= cfs_rq->sum_weight * delta;
>  }
>  
>  /*
> @@ -655,7 +655,7 @@ u64 cfs_avg_vruntime(struct cfs_rq *cfs_rq)
>  {
>  	struct sched_entity *curr = cfs_rq->curr;
>  	s64 avg = cfs_rq->avg_vruntime;
> -	long load = cfs_rq->avg_load;
> +	long load = cfs_rq->sum_weight;
>  
>  	if (curr && curr->on_rq) {
>  		unsigned long weight = scale_load_down(curr->load.weight);
> @@ -723,7 +723,7 @@ static int vruntime_eligible(struct cfs_rq *cfs_rq, u64 vruntime)
>  {
>  	struct sched_entity *curr = cfs_rq->curr;
>  	s64 avg = cfs_rq->avg_vruntime;
> -	long load = cfs_rq->avg_load;
> +	long load = cfs_rq->sum_weight;
>  
>  	if (curr && curr->on_rq) {
>  		unsigned long weight = scale_load_down(curr->load.weight);
> @@ -5172,7 +5172,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
>  		 *
>  		 *   vl_i = (W + w_i)*vl'_i / W
>  		 */
> -		load = cfs_rq->avg_load;
> +		load = cfs_rq->sum_weight;
>  		if (curr && curr->on_rq)
>  			load += scale_load_down(curr->load.weight);
>  
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index 47f7b6df634c..54994d93958a 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -679,7 +679,7 @@ struct cfs_rq {
>  	unsigned int		h_nr_idle;		/* SCHED_IDLE */
>  
>  	s64			avg_vruntime;
> -	u64			avg_load;
> +	u64			sum_weight;
>  
>  	u64			zero_vruntime;
>  #ifdef CONFIG_SCHED_CORE
> -- 
> 2.51.0
>
Re: [PATCH 5/6] sched/fair: Rename cfs_rq::avg_load to cfs_rq::sum_weight
Posted by Ingo Molnar 2 months ago
* Peter Zijlstra <peterz@infradead.org> wrote:

> On Mon, Dec 01, 2025 at 07:46:46AM +0100, Ingo Molnar wrote:
> > The ::avg_load field is a long-standing misnomer: it says it's an
> > 'average load', but in reality it's the momentary sum of the load
> > of all currently runnable tasks. We'd have to also perform a
> > division by nr_running (or use time-decay) to arrive at any sort
> > of average value.
> > 
> > This is clear from comments about the math of fair scheduling:
> > 
> >     *              \Sum w_i := cfs_rq->avg_load
> > 
> > The sum of all weights is ... the sum of all weights, not
> > the average of all weights.
> > 
> > To make it doubly confusing, there's also an ::avg_load
> > in the load-balancing struct sg_lb_stats, which *is* a
> > true average.
> > 
> > The second part of the field's name is a minor misnomer
> > as well: it says 'load', and it is indeed a load_weight
> > structure as it shares code with the load-balancer - but
> > it's only in an SMP load-balancing context where
> > load = weight, in the fair scheduling context the primary
> > purpose is the weighting of different nice levels.
> > 
> > So rename the field to ::sum_weight instead, which makes
> > the terminology of the EEVDF math match up with our
> > implementation of it:
> > 
> >     *              \Sum w_i := cfs_rq->sum_weight
> > 
> > Signed-off-by: Ingo Molnar <mingo@kernel.org>
> 
> Bah, this is going to be a pain rebasing for me, but yes, these
> variables are poorly named. 'sum_weight' is a better name.

Fair enough, and to make this easier for you I've 
rebased your worst affected tree (queue.git:sched/flat) 
on top of the mingo/tip:WIP.sched/core-for-v6.20 tree, 
which includes these renames (with all your feedback 
addressed AFAICT), see:

  git://git.kernel.org/pub/scm/linux/kernel/git/mingo/tip.git WIP.sched/flat

... and it builds and boots. :-)

Thanks,

	Ingo
[tip: sched/core] sched/fair: Rename cfs_rq::avg_load to cfs_rq::sum_weight
Posted by tip-bot2 for Ingo Molnar 1 month, 3 weeks ago
The following commit has been merged into the sched/core branch of tip:

Commit-ID:     4ff674fa986c27ec8a0542479258c92d361a2566
Gitweb:        https://git.kernel.org/tip/4ff674fa986c27ec8a0542479258c92d361a2566
Author:        Ingo Molnar <mingo@kernel.org>
AuthorDate:    Wed, 26 Nov 2025 12:09:16 +01:00
Committer:     Ingo Molnar <mingo@kernel.org>
CommitterDate: Mon, 15 Dec 2025 07:52:44 +01:00

sched/fair: Rename cfs_rq::avg_load to cfs_rq::sum_weight

The ::avg_load field is a long-standing misnomer: it says it's an
'average load', but in reality it's the momentary sum of the load
of all currently runnable tasks. We'd have to also perform a
division by nr_running (or use time-decay) to arrive at any sort
of average value.

This is clear from comments about the math of fair scheduling:

    *              \Sum w_i := cfs_rq->avg_load

The sum of all weights is ... the sum of all weights, not
the average of all weights.

To make it doubly confusing, there's also an ::avg_load
in the load-balancing struct sg_lb_stats, which *is* a
true average.

The second part of the field's name is a minor misnomer
as well: it says 'load', and it is indeed a load_weight
structure as it shares code with the load-balancer - but
it's only in an SMP load-balancing context where
load = weight, in the fair scheduling context the primary
purpose is the weighting of different nice levels.

So rename the field to ::sum_weight instead, which makes
the terminology of the EEVDF math match up with our
implementation of it:

    *              \Sum w_i := cfs_rq->sum_weight

Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://patch.msgid.link/20251201064647.1851919-6-mingo@kernel.org
---
 kernel/sched/fair.c  | 16 ++++++++--------
 kernel/sched/sched.h |  2 +-
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f79951f..65b1065 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -608,7 +608,7 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
  *
  *                    v0 := cfs_rq->zero_vruntime
  * \Sum (v_i - v0) * w_i := cfs_rq->avg_vruntime
- *              \Sum w_i := cfs_rq->avg_load
+ *              \Sum w_i := cfs_rq->sum_weight
  *
  * Since zero_vruntime closely tracks the per-task service, these
  * deltas: (v_i - v), will be in the order of the maximal (virtual) lag
@@ -625,7 +625,7 @@ avg_vruntime_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	s64 key = entity_key(cfs_rq, se);
 
 	cfs_rq->avg_vruntime += key * weight;
-	cfs_rq->avg_load += weight;
+	cfs_rq->sum_weight += weight;
 }
 
 static void
@@ -635,16 +635,16 @@ avg_vruntime_sub(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	s64 key = entity_key(cfs_rq, se);
 
 	cfs_rq->avg_vruntime -= key * weight;
-	cfs_rq->avg_load -= weight;
+	cfs_rq->sum_weight -= weight;
 }
 
 static inline
 void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
 {
 	/*
-	 * v' = v + d ==> avg_vruntime' = avg_runtime - d*avg_load
+	 * v' = v + d ==> avg_vruntime' = avg_runtime - d*sum_weight
 	 */
-	cfs_rq->avg_vruntime -= cfs_rq->avg_load * delta;
+	cfs_rq->avg_vruntime -= cfs_rq->sum_weight * delta;
 }
 
 /*
@@ -655,7 +655,7 @@ u64 avg_vruntime(struct cfs_rq *cfs_rq)
 {
 	struct sched_entity *curr = cfs_rq->curr;
 	s64 avg = cfs_rq->avg_vruntime;
-	long load = cfs_rq->avg_load;
+	long load = cfs_rq->sum_weight;
 
 	if (curr && curr->on_rq) {
 		unsigned long weight = scale_load_down(curr->load.weight);
@@ -723,7 +723,7 @@ static int vruntime_eligible(struct cfs_rq *cfs_rq, u64 vruntime)
 {
 	struct sched_entity *curr = cfs_rq->curr;
 	s64 avg = cfs_rq->avg_vruntime;
-	long load = cfs_rq->avg_load;
+	long load = cfs_rq->sum_weight;
 
 	if (curr && curr->on_rq) {
 		unsigned long weight = scale_load_down(curr->load.weight);
@@ -5131,7 +5131,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 		 *
 		 *   vl_i = (W + w_i)*vl'_i / W
 		 */
-		load = cfs_rq->avg_load;
+		load = cfs_rq->sum_weight;
 		if (curr && curr->on_rq)
 			load += scale_load_down(curr->load.weight);
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 82522c9..3334aa5 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -679,7 +679,7 @@ struct cfs_rq {
 	unsigned int		h_nr_idle;		/* SCHED_IDLE */
 
 	s64			avg_vruntime;
-	u64			avg_load;
+	u64			sum_weight;
 
 	u64			zero_vruntime;
 #ifdef CONFIG_SCHED_CORE
[tip: sched/core] sched/fair: Rename cfs_rq::avg_load to cfs_rq::sum_weight
Posted by tip-bot2 for Ingo Molnar 1 month, 3 weeks ago
The following commit has been merged into the sched/core branch of tip:

Commit-ID:     969c658869ff1c3998a449d2602c68b1d4b1ce06
Gitweb:        https://git.kernel.org/tip/969c658869ff1c3998a449d2602c68b1d4b1ce06
Author:        Ingo Molnar <mingo@kernel.org>
AuthorDate:    Wed, 26 Nov 2025 12:09:16 +01:00
Committer:     Ingo Molnar <mingo@kernel.org>
CommitterDate: Sun, 14 Dec 2025 08:25:03 +01:00

sched/fair: Rename cfs_rq::avg_load to cfs_rq::sum_weight

The ::avg_load field is a long-standing misnomer: it says it's an
'average load', but in reality it's the momentary sum of the load
of all currently runnable tasks. We'd have to also perform a
division by nr_running (or use time-decay) to arrive at any sort
of average value.

This is clear from comments about the math of fair scheduling:

    *              \Sum w_i := cfs_rq->avg_load

The sum of all weights is ... the sum of all weights, not
the average of all weights.

To make it doubly confusing, there's also an ::avg_load
in the load-balancing struct sg_lb_stats, which *is* a
true average.

The second part of the field's name is a minor misnomer
as well: it says 'load', and it is indeed a load_weight
structure as it shares code with the load-balancer - but
it's only in an SMP load-balancing context where
load = weight, in the fair scheduling context the primary
purpose is the weighting of different nice levels.

So rename the field to ::sum_weight instead, which makes
the terminology of the EEVDF math match up with our
implementation of it:

    *              \Sum w_i := cfs_rq->sum_weight

Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://patch.msgid.link/20251201064647.1851919-6-mingo@kernel.org
---
 kernel/sched/fair.c  | 16 ++++++++--------
 kernel/sched/sched.h |  2 +-
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ea276d8..8e21a95 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -608,7 +608,7 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
  *
  *                    v0 := cfs_rq->zero_vruntime
  * \Sum (v_i - v0) * w_i := cfs_rq->avg_vruntime
- *              \Sum w_i := cfs_rq->avg_load
+ *              \Sum w_i := cfs_rq->sum_weight
  *
  * Since zero_vruntime closely tracks the per-task service, these
  * deltas: (v_i - v), will be in the order of the maximal (virtual) lag
@@ -625,7 +625,7 @@ avg_vruntime_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	s64 key = entity_key(cfs_rq, se);
 
 	cfs_rq->avg_vruntime += key * weight;
-	cfs_rq->avg_load += weight;
+	cfs_rq->sum_weight += weight;
 }
 
 static void
@@ -635,16 +635,16 @@ avg_vruntime_sub(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	s64 key = entity_key(cfs_rq, se);
 
 	cfs_rq->avg_vruntime -= key * weight;
-	cfs_rq->avg_load -= weight;
+	cfs_rq->sum_weight -= weight;
 }
 
 static inline
 void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
 {
 	/*
-	 * v' = v + d ==> avg_vruntime' = avg_runtime - d*avg_load
+	 * v' = v + d ==> avg_vruntime' = avg_runtime - d*sum_weight
 	 */
-	cfs_rq->avg_vruntime -= cfs_rq->avg_load * delta;
+	cfs_rq->avg_vruntime -= cfs_rq->sum_weight * delta;
 }
 
 /*
@@ -655,7 +655,7 @@ u64 avg_vruntime(struct cfs_rq *cfs_rq)
 {
 	struct sched_entity *curr = cfs_rq->curr;
 	s64 avg = cfs_rq->avg_vruntime;
-	long load = cfs_rq->avg_load;
+	long load = cfs_rq->sum_weight;
 
 	if (curr && curr->on_rq) {
 		unsigned long weight = scale_load_down(curr->load.weight);
@@ -723,7 +723,7 @@ static int vruntime_eligible(struct cfs_rq *cfs_rq, u64 vruntime)
 {
 	struct sched_entity *curr = cfs_rq->curr;
 	s64 avg = cfs_rq->avg_vruntime;
-	long load = cfs_rq->avg_load;
+	long load = cfs_rq->sum_weight;
 
 	if (curr && curr->on_rq) {
 		unsigned long weight = scale_load_down(curr->load.weight);
@@ -5131,7 +5131,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 		 *
 		 *   vl_i = (W + w_i)*vl'_i / W
 		 */
-		load = cfs_rq->avg_load;
+		load = cfs_rq->sum_weight;
 		if (curr && curr->on_rq)
 			load += scale_load_down(curr->load.weight);
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 4ddb755..e3e9974 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -679,7 +679,7 @@ struct cfs_rq {
 	unsigned int		h_nr_idle;		/* SCHED_IDLE */
 
 	s64			avg_vruntime;
-	u64			avg_load;
+	u64			sum_weight;
 
 	u64			zero_vruntime;
 #ifdef CONFIG_SCHED_CORE