[PATCH v2 3/6] sched: Report the different kinds of imbalances in /proc/schedstat

Swapnil Sapkal posted 6 patches 1 year, 1 month ago
[PATCH v2 3/6] sched: Report the different kinds of imbalances in /proc/schedstat
Posted by Swapnil Sapkal 1 year, 1 month ago
In /proc/schedstat, lb_imbalance reports the sum of imbalances
discovered in sched domains with each call to sched_balance_rq(), which is
not very useful because lb_imbalance does not mention whether the imbalance
is due to load, utilization, nr_tasks or misfit_tasks. Remove this field
from /proc/schedstat.

Currently there is no field in /proc/schedstat to report different types
of imbalances. Introduce new fields in /proc/schedstat to report the
total imbalances in load, utilization, nr_tasks or misfit_tasks.

Added fields to /proc/schedstat:
        - lb_imbalance_load: Total imbalance due to load.
        - lb_imbalance_util: Total imbalance due to utilization.
        - lb_imbalance_task: Total imbalance due to number of tasks.
        - lb_imbalance_misfit: Total imbalance due to misfit tasks.

Reviewed-by: Shrikanth Hegde <sshegde@linux.ibm.com>
Signed-off-by: Swapnil Sapkal <swapnil.sapkal@amd.com>
---
 include/linux/sched/topology.h |  5 ++++-
 kernel/sched/fair.c            | 24 +++++++++++++++++++++++-
 kernel/sched/stats.c           |  7 +++++--
 3 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 4237daa5ac7a..76a662e1ec24 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -114,7 +114,10 @@ struct sched_domain {
 	unsigned int lb_count[CPU_MAX_IDLE_TYPES];
 	unsigned int lb_failed[CPU_MAX_IDLE_TYPES];
 	unsigned int lb_balanced[CPU_MAX_IDLE_TYPES];
-	unsigned int lb_imbalance[CPU_MAX_IDLE_TYPES];
+	unsigned int lb_imbalance_load[CPU_MAX_IDLE_TYPES];
+	unsigned int lb_imbalance_util[CPU_MAX_IDLE_TYPES];
+	unsigned int lb_imbalance_task[CPU_MAX_IDLE_TYPES];
+	unsigned int lb_imbalance_misfit[CPU_MAX_IDLE_TYPES];
 	unsigned int lb_gained[CPU_MAX_IDLE_TYPES];
 	unsigned int lb_hot_gained[CPU_MAX_IDLE_TYPES];
 	unsigned int lb_nobusyg[CPU_MAX_IDLE_TYPES];
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2ca3f098552c..5e7e4fe81648 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -11699,6 +11699,28 @@ static int should_we_balance(struct lb_env *env)
 	return group_balance_cpu(sg) == env->dst_cpu;
 }
 
+static void update_lb_imbalance_stat(struct lb_env *env, struct sched_domain *sd,
+				     enum cpu_idle_type idle)
+{
+	if (!schedstat_enabled())
+		return;
+
+	switch (env->migration_type) {
+	case migrate_load:
+		__schedstat_add(sd->lb_imbalance_load[idle], env->imbalance);
+		break;
+	case migrate_util:
+		__schedstat_add(sd->lb_imbalance_util[idle], env->imbalance);
+		break;
+	case migrate_task:
+		__schedstat_add(sd->lb_imbalance_task[idle], env->imbalance);
+		break;
+	case migrate_misfit:
+		__schedstat_add(sd->lb_imbalance_misfit[idle], env->imbalance);
+		break;
+	}
+}
+
 /*
  * Check this_cpu to ensure it is balanced within domain. Attempt to move
  * tasks if there is an imbalance.
@@ -11749,7 +11771,7 @@ static int sched_balance_rq(int this_cpu, struct rq *this_rq,
 
 	WARN_ON_ONCE(busiest == env.dst_rq);
 
-	schedstat_add(sd->lb_imbalance[idle], env.imbalance);
+	update_lb_imbalance_stat(&env, sd, idle);
 
 	env.src_cpu = busiest->cpu;
 	env.src_rq = busiest;
diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c
index eb0cdcd4d921..802bd9398a2e 100644
--- a/kernel/sched/stats.c
+++ b/kernel/sched/stats.c
@@ -141,11 +141,14 @@ static int show_schedstat(struct seq_file *seq, void *v)
 			seq_printf(seq, "domain%d %*pb", dcount++,
 				   cpumask_pr_args(sched_domain_span(sd)));
 			for (itype = 0; itype < CPU_MAX_IDLE_TYPES; itype++) {
-				seq_printf(seq, " %u %u %u %u %u %u %u %u",
+				seq_printf(seq, " %u %u %u %u %u %u %u %u %u %u %u",
 				    sd->lb_count[itype],
 				    sd->lb_balanced[itype],
 				    sd->lb_failed[itype],
-				    sd->lb_imbalance[itype],
+				    sd->lb_imbalance_load[itype],
+				    sd->lb_imbalance_util[itype],
+				    sd->lb_imbalance_task[itype],
+				    sd->lb_imbalance_misfit[itype],
 				    sd->lb_gained[itype],
 				    sd->lb_hot_gained[itype],
 				    sd->lb_nobusyq[itype],
-- 
2.43.0
Re: [PATCH v2 3/6] sched: Report the different kinds of imbalances in /proc/schedstat
Posted by Shrikanth Hegde 1 year, 1 month ago

On 12/20/24 12:02, Swapnil Sapkal wrote:
> In /proc/schedstat, lb_imbalance reports the sum of imbalances
> discovered in sched domains with each call to sched_balance_rq(), which is
> not very useful because lb_imbalance does not mention whether the imbalance
> is due to load, utilization, nr_tasks or misfit_tasks. Remove this field
> from /proc/schedstat.
> 
> Currently there is no field in /proc/schedstat to report different types
> of imbalances. Introduce new fields in /proc/schedstat to report the
> total imbalances in load, utilization, nr_tasks or misfit_tasks.
> 
> Added fields to /proc/schedstat:
>          - lb_imbalance_load: Total imbalance due to load.
>          - lb_imbalance_util: Total imbalance due to utilization.
>          - lb_imbalance_task: Total imbalance due to number of tasks.
>          - lb_imbalance_misfit: Total imbalance due to misfit tasks.
> 
> Reviewed-by: Shrikanth Hegde <sshegde@linux.ibm.com>
> Signed-off-by: Swapnil Sapkal <swapnil.sapkal@amd.com>

I think its better to merge patch 3 and patch 6.

> ---
>   include/linux/sched/topology.h |  5 ++++-
>   kernel/sched/fair.c            | 24 +++++++++++++++++++++++-
>   kernel/sched/stats.c           |  7 +++++--
>   3 files changed, 32 insertions(+), 4 deletions(-)
> 
> diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
> index 4237daa5ac7a..76a662e1ec24 100644
> --- a/include/linux/sched/topology.h
> +++ b/include/linux/sched/topology.h
> @@ -114,7 +114,10 @@ struct sched_domain {
>   	unsigned int lb_count[CPU_MAX_IDLE_TYPES];
>   	unsigned int lb_failed[CPU_MAX_IDLE_TYPES];
>   	unsigned int lb_balanced[CPU_MAX_IDLE_TYPES];
> -	unsigned int lb_imbalance[CPU_MAX_IDLE_TYPES];
> +	unsigned int lb_imbalance_load[CPU_MAX_IDLE_TYPES];
> +	unsigned int lb_imbalance_util[CPU_MAX_IDLE_TYPES];
> +	unsigned int lb_imbalance_task[CPU_MAX_IDLE_TYPES];
> +	unsigned int lb_imbalance_misfit[CPU_MAX_IDLE_TYPES];
>   	unsigned int lb_gained[CPU_MAX_IDLE_TYPES];
>   	unsigned int lb_hot_gained[CPU_MAX_IDLE_TYPES];
>   	unsigned int lb_nobusyg[CPU_MAX_IDLE_TYPES];
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 2ca3f098552c..5e7e4fe81648 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -11699,6 +11699,28 @@ static int should_we_balance(struct lb_env *env)
>   	return group_balance_cpu(sg) == env->dst_cpu;
>   }
>   
> +static void update_lb_imbalance_stat(struct lb_env *env, struct sched_domain *sd,
> +				     enum cpu_idle_type idle)
> +{
> +	if (!schedstat_enabled())
> +		return;
> +
> +	switch (env->migration_type) {
> +	case migrate_load:
> +		__schedstat_add(sd->lb_imbalance_load[idle], env->imbalance);
> +		break;
> +	case migrate_util:
> +		__schedstat_add(sd->lb_imbalance_util[idle], env->imbalance);
> +		break;
> +	case migrate_task:
> +		__schedstat_add(sd->lb_imbalance_task[idle], env->imbalance);
> +		break;
> +	case migrate_misfit:
> +		__schedstat_add(sd->lb_imbalance_misfit[idle], env->imbalance);
> +		break;
> +	}
> +}
> +
>   /*
>    * Check this_cpu to ensure it is balanced within domain. Attempt to move
>    * tasks if there is an imbalance.
> @@ -11749,7 +11771,7 @@ static int sched_balance_rq(int this_cpu, struct rq *this_rq,
>   
>   	WARN_ON_ONCE(busiest == env.dst_rq);
>   
> -	schedstat_add(sd->lb_imbalance[idle], env.imbalance);
> +	update_lb_imbalance_stat(&env, sd, idle);
>   
>   	env.src_cpu = busiest->cpu;
>   	env.src_rq = busiest;
> diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c
> index eb0cdcd4d921..802bd9398a2e 100644
> --- a/kernel/sched/stats.c
> +++ b/kernel/sched/stats.c
> @@ -141,11 +141,14 @@ static int show_schedstat(struct seq_file *seq, void *v)
>   			seq_printf(seq, "domain%d %*pb", dcount++,
>   				   cpumask_pr_args(sched_domain_span(sd)));
>   			for (itype = 0; itype < CPU_MAX_IDLE_TYPES; itype++) {
> -				seq_printf(seq, " %u %u %u %u %u %u %u %u",
> +				seq_printf(seq, " %u %u %u %u %u %u %u %u %u %u %u",
>   				    sd->lb_count[itype],
>   				    sd->lb_balanced[itype],
>   				    sd->lb_failed[itype],
> -				    sd->lb_imbalance[itype],
> +				    sd->lb_imbalance_load[itype],
> +				    sd->lb_imbalance_util[itype],
> +				    sd->lb_imbalance_task[itype],
> +				    sd->lb_imbalance_misfit[itype],
>   				    sd->lb_gained[itype],
>   				    sd->lb_hot_gained[itype],
>   				    sd->lb_nobusyq[itype],

While you are adding this, please update the 
Documentation/scheduler/sched-stats.rst as well.
Re: [PATCH v2 3/6] sched: Report the different kinds of imbalances in /proc/schedstat
Posted by Shrikanth Hegde 1 year, 1 month ago

On 12/20/24 23:53, Shrikanth Hegde wrote:
> 
> 
> On 12/20/24 12:02, Swapnil Sapkal wrote:
>> In /proc/schedstat, lb_imbalance reports the sum of imbalances
>> discovered in sched domains with each call to sched_balance_rq(), 
>> which is
>> not very useful because lb_imbalance does not mention whether the 
>> imbalance
>> is due to load, utilization, nr_tasks or misfit_tasks. Remove this field
>> from /proc/schedstat.
>>
>> Currently there is no field in /proc/schedstat to report different types
>> of imbalances. Introduce new fields in /proc/schedstat to report the
>> total imbalances in load, utilization, nr_tasks or misfit_tasks.
>>
>> Added fields to /proc/schedstat:
>>          - lb_imbalance_load: Total imbalance due to load.
>>          - lb_imbalance_util: Total imbalance due to utilization.
>>          - lb_imbalance_task: Total imbalance due to number of tasks.
>>          - lb_imbalance_misfit: Total imbalance due to misfit tasks.
>>
>> Reviewed-by: Shrikanth Hegde <sshegde@linux.ibm.com>
>> Signed-off-by: Swapnil Sapkal <swapnil.sapkal@amd.com>
> 
> I think its better to merge patch 3 and patch 6.

Please ignore this comment. Since there is change in patch 5 which 
affects the docs, it is better patch 6 stays separate. Sorry for the noise.
> 
>> ---
>>   include/linux/sched/topology.h |  5 ++++-
>>   kernel/sched/fair.c            | 24 +++++++++++++++++++++++-
>>   kernel/sched/stats.c           |  7 +++++--
>>   3 files changed, 32 insertions(+), 4 deletions(-)
>>
>> diff --git a/include/linux/sched/topology.h b/include/linux/sched/ 
>> topology.h
>> index 4237daa5ac7a..76a662e1ec24 100644
>> --- a/include/linux/sched/topology.h
>> +++ b/include/linux/sched/topology.h
>> @@ -114,7 +114,10 @@ struct sched_domain {
>>       unsigned int lb_count[CPU_MAX_IDLE_TYPES];
>>       unsigned int lb_failed[CPU_MAX_IDLE_TYPES];
>>       unsigned int lb_balanced[CPU_MAX_IDLE_TYPES];
>> -    unsigned int lb_imbalance[CPU_MAX_IDLE_TYPES];
>> +    unsigned int lb_imbalance_load[CPU_MAX_IDLE_TYPES];
>> +    unsigned int lb_imbalance_util[CPU_MAX_IDLE_TYPES];
>> +    unsigned int lb_imbalance_task[CPU_MAX_IDLE_TYPES];
>> +    unsigned int lb_imbalance_misfit[CPU_MAX_IDLE_TYPES];
>>       unsigned int lb_gained[CPU_MAX_IDLE_TYPES];
>>       unsigned int lb_hot_gained[CPU_MAX_IDLE_TYPES];
>>       unsigned int lb_nobusyg[CPU_MAX_IDLE_TYPES];
>> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
>> index 2ca3f098552c..5e7e4fe81648 100644
>> --- a/kernel/sched/fair.c
>> +++ b/kernel/sched/fair.c
>> @@ -11699,6 +11699,28 @@ static int should_we_balance(struct lb_env *env)
>>       return group_balance_cpu(sg) == env->dst_cpu;
>>   }
>> +static void update_lb_imbalance_stat(struct lb_env *env, struct 
>> sched_domain *sd,
>> +                     enum cpu_idle_type idle)
>> +{
>> +    if (!schedstat_enabled())
>> +        return;
>> +
>> +    switch (env->migration_type) {
>> +    case migrate_load:
>> +        __schedstat_add(sd->lb_imbalance_load[idle], env->imbalance);
>> +        break;
>> +    case migrate_util:
>> +        __schedstat_add(sd->lb_imbalance_util[idle], env->imbalance);
>> +        break;
>> +    case migrate_task:
>> +        __schedstat_add(sd->lb_imbalance_task[idle], env->imbalance);
>> +        break;
>> +    case migrate_misfit:
>> +        __schedstat_add(sd->lb_imbalance_misfit[idle], env->imbalance);
>> +        break;
>> +    }
>> +}
>> +
>>   /*
>>    * Check this_cpu to ensure it is balanced within domain. Attempt to 
>> move
>>    * tasks if there is an imbalance.
>> @@ -11749,7 +11771,7 @@ static int sched_balance_rq(int this_cpu, 
>> struct rq *this_rq,
>>       WARN_ON_ONCE(busiest == env.dst_rq);
>> -    schedstat_add(sd->lb_imbalance[idle], env.imbalance);
>> +    update_lb_imbalance_stat(&env, sd, idle);
>>       env.src_cpu = busiest->cpu;
>>       env.src_rq = busiest;
>> diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c
>> index eb0cdcd4d921..802bd9398a2e 100644
>> --- a/kernel/sched/stats.c
>> +++ b/kernel/sched/stats.c
>> @@ -141,11 +141,14 @@ static int show_schedstat(struct seq_file *seq, 
>> void *v)
>>               seq_printf(seq, "domain%d %*pb", dcount++,
>>                      cpumask_pr_args(sched_domain_span(sd)));
>>               for (itype = 0; itype < CPU_MAX_IDLE_TYPES; itype++) {
>> -                seq_printf(seq, " %u %u %u %u %u %u %u %u",
>> +                seq_printf(seq, " %u %u %u %u %u %u %u %u %u %u %u",
>>                       sd->lb_count[itype],
>>                       sd->lb_balanced[itype],
>>                       sd->lb_failed[itype],
>> -                    sd->lb_imbalance[itype],
>> +                    sd->lb_imbalance_load[itype],
>> +                    sd->lb_imbalance_util[itype],
>> +                    sd->lb_imbalance_task[itype],
>> +                    sd->lb_imbalance_misfit[itype],
>>                       sd->lb_gained[itype],
>>                       sd->lb_hot_gained[itype],
>>                       sd->lb_nobusyq[itype],
> 
> While you are adding this, please update the Documentation/scheduler/ 
> sched-stats.rst as well.

Please ignore this comment.

[tip: sched/core] sched: Report the different kinds of imbalances in /proc/schedstat
Posted by tip-bot2 for Swapnil Sapkal 1 year, 1 month ago
The following commit has been merged into the sched/core branch of tip:

Commit-ID:     3b2a793ea70fd14136b442df31e53935e8095034
Gitweb:        https://git.kernel.org/tip/3b2a793ea70fd14136b442df31e53935e8095034
Author:        Swapnil Sapkal <swapnil.sapkal@amd.com>
AuthorDate:    Fri, 20 Dec 2024 06:32:21 
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Fri, 20 Dec 2024 15:31:17 +01:00

sched: Report the different kinds of imbalances in /proc/schedstat

In /proc/schedstat, lb_imbalance reports the sum of imbalances
discovered in sched domains with each call to sched_balance_rq(), which is
not very useful because lb_imbalance does not mention whether the imbalance
is due to load, utilization, nr_tasks or misfit_tasks. Remove this field
from /proc/schedstat.

Currently there is no field in /proc/schedstat to report different types
of imbalances. Introduce new fields in /proc/schedstat to report the
total imbalances in load, utilization, nr_tasks or misfit_tasks.

Added fields to /proc/schedstat:
        - lb_imbalance_load: Total imbalance due to load.
        - lb_imbalance_util: Total imbalance due to utilization.
        - lb_imbalance_task: Total imbalance due to number of tasks.
        - lb_imbalance_misfit: Total imbalance due to misfit tasks.

Signed-off-by: Swapnil Sapkal <swapnil.sapkal@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Shrikanth Hegde <sshegde@linux.ibm.com>
Link: https://lore.kernel.org/r/20241220063224.17767-4-swapnil.sapkal@amd.com
---
 include/linux/sched/topology.h |  5 ++++-
 kernel/sched/fair.c            | 24 +++++++++++++++++++++++-
 kernel/sched/stats.c           |  7 +++++--
 3 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 4237daa..76a662e 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -114,7 +114,10 @@ struct sched_domain {
 	unsigned int lb_count[CPU_MAX_IDLE_TYPES];
 	unsigned int lb_failed[CPU_MAX_IDLE_TYPES];
 	unsigned int lb_balanced[CPU_MAX_IDLE_TYPES];
-	unsigned int lb_imbalance[CPU_MAX_IDLE_TYPES];
+	unsigned int lb_imbalance_load[CPU_MAX_IDLE_TYPES];
+	unsigned int lb_imbalance_util[CPU_MAX_IDLE_TYPES];
+	unsigned int lb_imbalance_task[CPU_MAX_IDLE_TYPES];
+	unsigned int lb_imbalance_misfit[CPU_MAX_IDLE_TYPES];
 	unsigned int lb_gained[CPU_MAX_IDLE_TYPES];
 	unsigned int lb_hot_gained[CPU_MAX_IDLE_TYPES];
 	unsigned int lb_nobusyg[CPU_MAX_IDLE_TYPES];
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e5c0c61..b3418b5 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -11705,6 +11705,28 @@ static int should_we_balance(struct lb_env *env)
 	return group_balance_cpu(sg) == env->dst_cpu;
 }
 
+static void update_lb_imbalance_stat(struct lb_env *env, struct sched_domain *sd,
+				     enum cpu_idle_type idle)
+{
+	if (!schedstat_enabled())
+		return;
+
+	switch (env->migration_type) {
+	case migrate_load:
+		__schedstat_add(sd->lb_imbalance_load[idle], env->imbalance);
+		break;
+	case migrate_util:
+		__schedstat_add(sd->lb_imbalance_util[idle], env->imbalance);
+		break;
+	case migrate_task:
+		__schedstat_add(sd->lb_imbalance_task[idle], env->imbalance);
+		break;
+	case migrate_misfit:
+		__schedstat_add(sd->lb_imbalance_misfit[idle], env->imbalance);
+		break;
+	}
+}
+
 /*
  * Check this_cpu to ensure it is balanced within domain. Attempt to move
  * tasks if there is an imbalance.
@@ -11755,7 +11777,7 @@ redo:
 
 	WARN_ON_ONCE(busiest == env.dst_rq);
 
-	schedstat_add(sd->lb_imbalance[idle], env.imbalance);
+	update_lb_imbalance_stat(&env, sd, idle);
 
 	env.src_cpu = busiest->cpu;
 	env.src_rq = busiest;
diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c
index eb0cdcd..802bd93 100644
--- a/kernel/sched/stats.c
+++ b/kernel/sched/stats.c
@@ -141,11 +141,14 @@ static int show_schedstat(struct seq_file *seq, void *v)
 			seq_printf(seq, "domain%d %*pb", dcount++,
 				   cpumask_pr_args(sched_domain_span(sd)));
 			for (itype = 0; itype < CPU_MAX_IDLE_TYPES; itype++) {
-				seq_printf(seq, " %u %u %u %u %u %u %u %u",
+				seq_printf(seq, " %u %u %u %u %u %u %u %u %u %u %u",
 				    sd->lb_count[itype],
 				    sd->lb_balanced[itype],
 				    sd->lb_failed[itype],
-				    sd->lb_imbalance[itype],
+				    sd->lb_imbalance_load[itype],
+				    sd->lb_imbalance_util[itype],
+				    sd->lb_imbalance_task[itype],
+				    sd->lb_imbalance_misfit[itype],
 				    sd->lb_gained[itype],
 				    sd->lb_hot_gained[itype],
 				    sd->lb_nobusyq[itype],