In /proc/schedstat, lb_imbalance reports the sum of imbalances
discovered in sched domains with each call to sched_balance_rq(), which is
not very useful because lb_imbalance does not mention whether the imbalance
is due to load, utilization, nr_tasks or misfit_tasks. Remove this field
from /proc/schedstat.
Currently there is no field in /proc/schedstat to report different types
of imbalances. Introduce new fields in /proc/schedstat to report the
total imbalances in load, utilization, nr_tasks or misfit_tasks.
Added fields to /proc/schedstat:
- lb_imbalance_load: Total imbalance due to load.
- lb_imbalance_util: Total imbalance due to utilization.
- lb_imbalance_task: Total imbalance due to number of tasks.
- lb_imbalance_misfit: Total imbalance due to misfit tasks.
Reviewed-by: Shrikanth Hegde <sshegde@linux.ibm.com>
Signed-off-by: Swapnil Sapkal <swapnil.sapkal@amd.com>
---
include/linux/sched/topology.h | 5 ++++-
kernel/sched/fair.c | 24 +++++++++++++++++++++++-
kernel/sched/stats.c | 7 +++++--
3 files changed, 32 insertions(+), 4 deletions(-)
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 4237daa5ac7a..76a662e1ec24 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -114,7 +114,10 @@ struct sched_domain {
unsigned int lb_count[CPU_MAX_IDLE_TYPES];
unsigned int lb_failed[CPU_MAX_IDLE_TYPES];
unsigned int lb_balanced[CPU_MAX_IDLE_TYPES];
- unsigned int lb_imbalance[CPU_MAX_IDLE_TYPES];
+ unsigned int lb_imbalance_load[CPU_MAX_IDLE_TYPES];
+ unsigned int lb_imbalance_util[CPU_MAX_IDLE_TYPES];
+ unsigned int lb_imbalance_task[CPU_MAX_IDLE_TYPES];
+ unsigned int lb_imbalance_misfit[CPU_MAX_IDLE_TYPES];
unsigned int lb_gained[CPU_MAX_IDLE_TYPES];
unsigned int lb_hot_gained[CPU_MAX_IDLE_TYPES];
unsigned int lb_nobusyg[CPU_MAX_IDLE_TYPES];
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2ca3f098552c..5e7e4fe81648 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -11699,6 +11699,28 @@ static int should_we_balance(struct lb_env *env)
return group_balance_cpu(sg) == env->dst_cpu;
}
+static void update_lb_imbalance_stat(struct lb_env *env, struct sched_domain *sd,
+ enum cpu_idle_type idle)
+{
+ if (!schedstat_enabled())
+ return;
+
+ switch (env->migration_type) {
+ case migrate_load:
+ __schedstat_add(sd->lb_imbalance_load[idle], env->imbalance);
+ break;
+ case migrate_util:
+ __schedstat_add(sd->lb_imbalance_util[idle], env->imbalance);
+ break;
+ case migrate_task:
+ __schedstat_add(sd->lb_imbalance_task[idle], env->imbalance);
+ break;
+ case migrate_misfit:
+ __schedstat_add(sd->lb_imbalance_misfit[idle], env->imbalance);
+ break;
+ }
+}
+
/*
* Check this_cpu to ensure it is balanced within domain. Attempt to move
* tasks if there is an imbalance.
@@ -11749,7 +11771,7 @@ static int sched_balance_rq(int this_cpu, struct rq *this_rq,
WARN_ON_ONCE(busiest == env.dst_rq);
- schedstat_add(sd->lb_imbalance[idle], env.imbalance);
+ update_lb_imbalance_stat(&env, sd, idle);
env.src_cpu = busiest->cpu;
env.src_rq = busiest;
diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c
index eb0cdcd4d921..802bd9398a2e 100644
--- a/kernel/sched/stats.c
+++ b/kernel/sched/stats.c
@@ -141,11 +141,14 @@ static int show_schedstat(struct seq_file *seq, void *v)
seq_printf(seq, "domain%d %*pb", dcount++,
cpumask_pr_args(sched_domain_span(sd)));
for (itype = 0; itype < CPU_MAX_IDLE_TYPES; itype++) {
- seq_printf(seq, " %u %u %u %u %u %u %u %u",
+ seq_printf(seq, " %u %u %u %u %u %u %u %u %u %u %u",
sd->lb_count[itype],
sd->lb_balanced[itype],
sd->lb_failed[itype],
- sd->lb_imbalance[itype],
+ sd->lb_imbalance_load[itype],
+ sd->lb_imbalance_util[itype],
+ sd->lb_imbalance_task[itype],
+ sd->lb_imbalance_misfit[itype],
sd->lb_gained[itype],
sd->lb_hot_gained[itype],
sd->lb_nobusyq[itype],
--
2.43.0
On 12/20/24 12:02, Swapnil Sapkal wrote:
> In /proc/schedstat, lb_imbalance reports the sum of imbalances
> discovered in sched domains with each call to sched_balance_rq(), which is
> not very useful because lb_imbalance does not mention whether the imbalance
> is due to load, utilization, nr_tasks or misfit_tasks. Remove this field
> from /proc/schedstat.
>
> Currently there is no field in /proc/schedstat to report different types
> of imbalances. Introduce new fields in /proc/schedstat to report the
> total imbalances in load, utilization, nr_tasks or misfit_tasks.
>
> Added fields to /proc/schedstat:
> - lb_imbalance_load: Total imbalance due to load.
> - lb_imbalance_util: Total imbalance due to utilization.
> - lb_imbalance_task: Total imbalance due to number of tasks.
> - lb_imbalance_misfit: Total imbalance due to misfit tasks.
>
> Reviewed-by: Shrikanth Hegde <sshegde@linux.ibm.com>
> Signed-off-by: Swapnil Sapkal <swapnil.sapkal@amd.com>
I think its better to merge patch 3 and patch 6.
> ---
> include/linux/sched/topology.h | 5 ++++-
> kernel/sched/fair.c | 24 +++++++++++++++++++++++-
> kernel/sched/stats.c | 7 +++++--
> 3 files changed, 32 insertions(+), 4 deletions(-)
>
> diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
> index 4237daa5ac7a..76a662e1ec24 100644
> --- a/include/linux/sched/topology.h
> +++ b/include/linux/sched/topology.h
> @@ -114,7 +114,10 @@ struct sched_domain {
> unsigned int lb_count[CPU_MAX_IDLE_TYPES];
> unsigned int lb_failed[CPU_MAX_IDLE_TYPES];
> unsigned int lb_balanced[CPU_MAX_IDLE_TYPES];
> - unsigned int lb_imbalance[CPU_MAX_IDLE_TYPES];
> + unsigned int lb_imbalance_load[CPU_MAX_IDLE_TYPES];
> + unsigned int lb_imbalance_util[CPU_MAX_IDLE_TYPES];
> + unsigned int lb_imbalance_task[CPU_MAX_IDLE_TYPES];
> + unsigned int lb_imbalance_misfit[CPU_MAX_IDLE_TYPES];
> unsigned int lb_gained[CPU_MAX_IDLE_TYPES];
> unsigned int lb_hot_gained[CPU_MAX_IDLE_TYPES];
> unsigned int lb_nobusyg[CPU_MAX_IDLE_TYPES];
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 2ca3f098552c..5e7e4fe81648 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -11699,6 +11699,28 @@ static int should_we_balance(struct lb_env *env)
> return group_balance_cpu(sg) == env->dst_cpu;
> }
>
> +static void update_lb_imbalance_stat(struct lb_env *env, struct sched_domain *sd,
> + enum cpu_idle_type idle)
> +{
> + if (!schedstat_enabled())
> + return;
> +
> + switch (env->migration_type) {
> + case migrate_load:
> + __schedstat_add(sd->lb_imbalance_load[idle], env->imbalance);
> + break;
> + case migrate_util:
> + __schedstat_add(sd->lb_imbalance_util[idle], env->imbalance);
> + break;
> + case migrate_task:
> + __schedstat_add(sd->lb_imbalance_task[idle], env->imbalance);
> + break;
> + case migrate_misfit:
> + __schedstat_add(sd->lb_imbalance_misfit[idle], env->imbalance);
> + break;
> + }
> +}
> +
> /*
> * Check this_cpu to ensure it is balanced within domain. Attempt to move
> * tasks if there is an imbalance.
> @@ -11749,7 +11771,7 @@ static int sched_balance_rq(int this_cpu, struct rq *this_rq,
>
> WARN_ON_ONCE(busiest == env.dst_rq);
>
> - schedstat_add(sd->lb_imbalance[idle], env.imbalance);
> + update_lb_imbalance_stat(&env, sd, idle);
>
> env.src_cpu = busiest->cpu;
> env.src_rq = busiest;
> diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c
> index eb0cdcd4d921..802bd9398a2e 100644
> --- a/kernel/sched/stats.c
> +++ b/kernel/sched/stats.c
> @@ -141,11 +141,14 @@ static int show_schedstat(struct seq_file *seq, void *v)
> seq_printf(seq, "domain%d %*pb", dcount++,
> cpumask_pr_args(sched_domain_span(sd)));
> for (itype = 0; itype < CPU_MAX_IDLE_TYPES; itype++) {
> - seq_printf(seq, " %u %u %u %u %u %u %u %u",
> + seq_printf(seq, " %u %u %u %u %u %u %u %u %u %u %u",
> sd->lb_count[itype],
> sd->lb_balanced[itype],
> sd->lb_failed[itype],
> - sd->lb_imbalance[itype],
> + sd->lb_imbalance_load[itype],
> + sd->lb_imbalance_util[itype],
> + sd->lb_imbalance_task[itype],
> + sd->lb_imbalance_misfit[itype],
> sd->lb_gained[itype],
> sd->lb_hot_gained[itype],
> sd->lb_nobusyq[itype],
While you are adding this, please update the
Documentation/scheduler/sched-stats.rst as well.
On 12/20/24 23:53, Shrikanth Hegde wrote:
>
>
> On 12/20/24 12:02, Swapnil Sapkal wrote:
>> In /proc/schedstat, lb_imbalance reports the sum of imbalances
>> discovered in sched domains with each call to sched_balance_rq(),
>> which is
>> not very useful because lb_imbalance does not mention whether the
>> imbalance
>> is due to load, utilization, nr_tasks or misfit_tasks. Remove this field
>> from /proc/schedstat.
>>
>> Currently there is no field in /proc/schedstat to report different types
>> of imbalances. Introduce new fields in /proc/schedstat to report the
>> total imbalances in load, utilization, nr_tasks or misfit_tasks.
>>
>> Added fields to /proc/schedstat:
>> - lb_imbalance_load: Total imbalance due to load.
>> - lb_imbalance_util: Total imbalance due to utilization.
>> - lb_imbalance_task: Total imbalance due to number of tasks.
>> - lb_imbalance_misfit: Total imbalance due to misfit tasks.
>>
>> Reviewed-by: Shrikanth Hegde <sshegde@linux.ibm.com>
>> Signed-off-by: Swapnil Sapkal <swapnil.sapkal@amd.com>
>
> I think its better to merge patch 3 and patch 6.
Please ignore this comment. Since there is change in patch 5 which
affects the docs, it is better patch 6 stays separate. Sorry for the noise.
>
>> ---
>> include/linux/sched/topology.h | 5 ++++-
>> kernel/sched/fair.c | 24 +++++++++++++++++++++++-
>> kernel/sched/stats.c | 7 +++++--
>> 3 files changed, 32 insertions(+), 4 deletions(-)
>>
>> diff --git a/include/linux/sched/topology.h b/include/linux/sched/
>> topology.h
>> index 4237daa5ac7a..76a662e1ec24 100644
>> --- a/include/linux/sched/topology.h
>> +++ b/include/linux/sched/topology.h
>> @@ -114,7 +114,10 @@ struct sched_domain {
>> unsigned int lb_count[CPU_MAX_IDLE_TYPES];
>> unsigned int lb_failed[CPU_MAX_IDLE_TYPES];
>> unsigned int lb_balanced[CPU_MAX_IDLE_TYPES];
>> - unsigned int lb_imbalance[CPU_MAX_IDLE_TYPES];
>> + unsigned int lb_imbalance_load[CPU_MAX_IDLE_TYPES];
>> + unsigned int lb_imbalance_util[CPU_MAX_IDLE_TYPES];
>> + unsigned int lb_imbalance_task[CPU_MAX_IDLE_TYPES];
>> + unsigned int lb_imbalance_misfit[CPU_MAX_IDLE_TYPES];
>> unsigned int lb_gained[CPU_MAX_IDLE_TYPES];
>> unsigned int lb_hot_gained[CPU_MAX_IDLE_TYPES];
>> unsigned int lb_nobusyg[CPU_MAX_IDLE_TYPES];
>> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
>> index 2ca3f098552c..5e7e4fe81648 100644
>> --- a/kernel/sched/fair.c
>> +++ b/kernel/sched/fair.c
>> @@ -11699,6 +11699,28 @@ static int should_we_balance(struct lb_env *env)
>> return group_balance_cpu(sg) == env->dst_cpu;
>> }
>> +static void update_lb_imbalance_stat(struct lb_env *env, struct
>> sched_domain *sd,
>> + enum cpu_idle_type idle)
>> +{
>> + if (!schedstat_enabled())
>> + return;
>> +
>> + switch (env->migration_type) {
>> + case migrate_load:
>> + __schedstat_add(sd->lb_imbalance_load[idle], env->imbalance);
>> + break;
>> + case migrate_util:
>> + __schedstat_add(sd->lb_imbalance_util[idle], env->imbalance);
>> + break;
>> + case migrate_task:
>> + __schedstat_add(sd->lb_imbalance_task[idle], env->imbalance);
>> + break;
>> + case migrate_misfit:
>> + __schedstat_add(sd->lb_imbalance_misfit[idle], env->imbalance);
>> + break;
>> + }
>> +}
>> +
>> /*
>> * Check this_cpu to ensure it is balanced within domain. Attempt to
>> move
>> * tasks if there is an imbalance.
>> @@ -11749,7 +11771,7 @@ static int sched_balance_rq(int this_cpu,
>> struct rq *this_rq,
>> WARN_ON_ONCE(busiest == env.dst_rq);
>> - schedstat_add(sd->lb_imbalance[idle], env.imbalance);
>> + update_lb_imbalance_stat(&env, sd, idle);
>> env.src_cpu = busiest->cpu;
>> env.src_rq = busiest;
>> diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c
>> index eb0cdcd4d921..802bd9398a2e 100644
>> --- a/kernel/sched/stats.c
>> +++ b/kernel/sched/stats.c
>> @@ -141,11 +141,14 @@ static int show_schedstat(struct seq_file *seq,
>> void *v)
>> seq_printf(seq, "domain%d %*pb", dcount++,
>> cpumask_pr_args(sched_domain_span(sd)));
>> for (itype = 0; itype < CPU_MAX_IDLE_TYPES; itype++) {
>> - seq_printf(seq, " %u %u %u %u %u %u %u %u",
>> + seq_printf(seq, " %u %u %u %u %u %u %u %u %u %u %u",
>> sd->lb_count[itype],
>> sd->lb_balanced[itype],
>> sd->lb_failed[itype],
>> - sd->lb_imbalance[itype],
>> + sd->lb_imbalance_load[itype],
>> + sd->lb_imbalance_util[itype],
>> + sd->lb_imbalance_task[itype],
>> + sd->lb_imbalance_misfit[itype],
>> sd->lb_gained[itype],
>> sd->lb_hot_gained[itype],
>> sd->lb_nobusyq[itype],
>
> While you are adding this, please update the Documentation/scheduler/
> sched-stats.rst as well.
Please ignore this comment.
The following commit has been merged into the sched/core branch of tip:
Commit-ID: 3b2a793ea70fd14136b442df31e53935e8095034
Gitweb: https://git.kernel.org/tip/3b2a793ea70fd14136b442df31e53935e8095034
Author: Swapnil Sapkal <swapnil.sapkal@amd.com>
AuthorDate: Fri, 20 Dec 2024 06:32:21
Committer: Peter Zijlstra <peterz@infradead.org>
CommitterDate: Fri, 20 Dec 2024 15:31:17 +01:00
sched: Report the different kinds of imbalances in /proc/schedstat
In /proc/schedstat, lb_imbalance reports the sum of imbalances
discovered in sched domains with each call to sched_balance_rq(), which is
not very useful because lb_imbalance does not mention whether the imbalance
is due to load, utilization, nr_tasks or misfit_tasks. Remove this field
from /proc/schedstat.
Currently there is no field in /proc/schedstat to report different types
of imbalances. Introduce new fields in /proc/schedstat to report the
total imbalances in load, utilization, nr_tasks or misfit_tasks.
Added fields to /proc/schedstat:
- lb_imbalance_load: Total imbalance due to load.
- lb_imbalance_util: Total imbalance due to utilization.
- lb_imbalance_task: Total imbalance due to number of tasks.
- lb_imbalance_misfit: Total imbalance due to misfit tasks.
Signed-off-by: Swapnil Sapkal <swapnil.sapkal@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Shrikanth Hegde <sshegde@linux.ibm.com>
Link: https://lore.kernel.org/r/20241220063224.17767-4-swapnil.sapkal@amd.com
---
include/linux/sched/topology.h | 5 ++++-
kernel/sched/fair.c | 24 +++++++++++++++++++++++-
kernel/sched/stats.c | 7 +++++--
3 files changed, 32 insertions(+), 4 deletions(-)
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 4237daa..76a662e 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -114,7 +114,10 @@ struct sched_domain {
unsigned int lb_count[CPU_MAX_IDLE_TYPES];
unsigned int lb_failed[CPU_MAX_IDLE_TYPES];
unsigned int lb_balanced[CPU_MAX_IDLE_TYPES];
- unsigned int lb_imbalance[CPU_MAX_IDLE_TYPES];
+ unsigned int lb_imbalance_load[CPU_MAX_IDLE_TYPES];
+ unsigned int lb_imbalance_util[CPU_MAX_IDLE_TYPES];
+ unsigned int lb_imbalance_task[CPU_MAX_IDLE_TYPES];
+ unsigned int lb_imbalance_misfit[CPU_MAX_IDLE_TYPES];
unsigned int lb_gained[CPU_MAX_IDLE_TYPES];
unsigned int lb_hot_gained[CPU_MAX_IDLE_TYPES];
unsigned int lb_nobusyg[CPU_MAX_IDLE_TYPES];
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e5c0c61..b3418b5 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -11705,6 +11705,28 @@ static int should_we_balance(struct lb_env *env)
return group_balance_cpu(sg) == env->dst_cpu;
}
+static void update_lb_imbalance_stat(struct lb_env *env, struct sched_domain *sd,
+ enum cpu_idle_type idle)
+{
+ if (!schedstat_enabled())
+ return;
+
+ switch (env->migration_type) {
+ case migrate_load:
+ __schedstat_add(sd->lb_imbalance_load[idle], env->imbalance);
+ break;
+ case migrate_util:
+ __schedstat_add(sd->lb_imbalance_util[idle], env->imbalance);
+ break;
+ case migrate_task:
+ __schedstat_add(sd->lb_imbalance_task[idle], env->imbalance);
+ break;
+ case migrate_misfit:
+ __schedstat_add(sd->lb_imbalance_misfit[idle], env->imbalance);
+ break;
+ }
+}
+
/*
* Check this_cpu to ensure it is balanced within domain. Attempt to move
* tasks if there is an imbalance.
@@ -11755,7 +11777,7 @@ redo:
WARN_ON_ONCE(busiest == env.dst_rq);
- schedstat_add(sd->lb_imbalance[idle], env.imbalance);
+ update_lb_imbalance_stat(&env, sd, idle);
env.src_cpu = busiest->cpu;
env.src_rq = busiest;
diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c
index eb0cdcd..802bd93 100644
--- a/kernel/sched/stats.c
+++ b/kernel/sched/stats.c
@@ -141,11 +141,14 @@ static int show_schedstat(struct seq_file *seq, void *v)
seq_printf(seq, "domain%d %*pb", dcount++,
cpumask_pr_args(sched_domain_span(sd)));
for (itype = 0; itype < CPU_MAX_IDLE_TYPES; itype++) {
- seq_printf(seq, " %u %u %u %u %u %u %u %u",
+ seq_printf(seq, " %u %u %u %u %u %u %u %u %u %u %u",
sd->lb_count[itype],
sd->lb_balanced[itype],
sd->lb_failed[itype],
- sd->lb_imbalance[itype],
+ sd->lb_imbalance_load[itype],
+ sd->lb_imbalance_util[itype],
+ sd->lb_imbalance_task[itype],
+ sd->lb_imbalance_misfit[itype],
sd->lb_gained[itype],
sd->lb_hot_gained[itype],
sd->lb_nobusyq[itype],
© 2016 - 2026 Red Hat, Inc.