In /proc/schedstat, lb_imbalance reports the sum of imbalances
discovered in sched domains with each call to sched_balance_rq(), which is
not very useful because lb_imbalance does not mention whether the imbalance
is due to load, utilization, nr_tasks or misfit_tasks. Remove this field
from /proc/schedstat.
Currently there is no field in /proc/schedstat to report different types
of imbalances. Introduce new fields in /proc/schedstat to report the
total imbalances in load, utilization, nr_tasks or misfit_tasks.
Added fields to /proc/schedstat:
- lb_imbalance_load: Total imbalance due to load.
- lb_imbalance_util: Total imbalance due to utilization.
- lb_imbalance_task: Total imbalance due to number of tasks.
- lb_imbalance_misfit: Total imbalance due to misfit tasks.
Reviewed-by: Shrikanth Hegde <sshegde@linux.ibm.com>
Signed-off-by: Swapnil Sapkal <swapnil.sapkal@amd.com>
---
include/linux/sched/topology.h | 5 ++++-
kernel/sched/fair.c | 21 ++++++++++++++++++++-
kernel/sched/stats.c | 7 +++++--
3 files changed, 29 insertions(+), 4 deletions(-)
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 4237daa5ac7a..76a662e1ec24 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -114,7 +114,10 @@ struct sched_domain {
unsigned int lb_count[CPU_MAX_IDLE_TYPES];
unsigned int lb_failed[CPU_MAX_IDLE_TYPES];
unsigned int lb_balanced[CPU_MAX_IDLE_TYPES];
- unsigned int lb_imbalance[CPU_MAX_IDLE_TYPES];
+ unsigned int lb_imbalance_load[CPU_MAX_IDLE_TYPES];
+ unsigned int lb_imbalance_util[CPU_MAX_IDLE_TYPES];
+ unsigned int lb_imbalance_task[CPU_MAX_IDLE_TYPES];
+ unsigned int lb_imbalance_misfit[CPU_MAX_IDLE_TYPES];
unsigned int lb_gained[CPU_MAX_IDLE_TYPES];
unsigned int lb_hot_gained[CPU_MAX_IDLE_TYPES];
unsigned int lb_nobusyg[CPU_MAX_IDLE_TYPES];
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ec403e81ffef..91f33cb9fb23 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -11659,6 +11659,25 @@ static int should_we_balance(struct lb_env *env)
return group_balance_cpu(sg) == env->dst_cpu;
}
+static void update_lb_imbalance_stat(struct lb_env *env, struct sched_domain *sd,
+ enum cpu_idle_type idle)
+{
+ switch (env->migration_type) {
+ case migrate_load:
+ schedstat_add(sd->lb_imbalance_load[idle], env->imbalance);
+ break;
+ case migrate_util:
+ schedstat_add(sd->lb_imbalance_util[idle], env->imbalance);
+ break;
+ case migrate_task:
+ schedstat_add(sd->lb_imbalance_task[idle], env->imbalance);
+ break;
+ case migrate_misfit:
+ schedstat_add(sd->lb_imbalance_misfit[idle], env->imbalance);
+ break;
+ }
+}
+
/*
* Check this_cpu to ensure it is balanced within domain. Attempt to move
* tasks if there is an imbalance.
@@ -11709,7 +11728,7 @@ static int sched_balance_rq(int this_cpu, struct rq *this_rq,
WARN_ON_ONCE(busiest == env.dst_rq);
- schedstat_add(sd->lb_imbalance[idle], env.imbalance);
+ update_lb_imbalance_stat(&env, sd, idle);
env.src_cpu = busiest->cpu;
env.src_rq = busiest;
diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c
index eb0cdcd4d921..802bd9398a2e 100644
--- a/kernel/sched/stats.c
+++ b/kernel/sched/stats.c
@@ -141,11 +141,14 @@ static int show_schedstat(struct seq_file *seq, void *v)
seq_printf(seq, "domain%d %*pb", dcount++,
cpumask_pr_args(sched_domain_span(sd)));
for (itype = 0; itype < CPU_MAX_IDLE_TYPES; itype++) {
- seq_printf(seq, " %u %u %u %u %u %u %u %u",
+ seq_printf(seq, " %u %u %u %u %u %u %u %u %u %u %u",
sd->lb_count[itype],
sd->lb_balanced[itype],
sd->lb_failed[itype],
- sd->lb_imbalance[itype],
+ sd->lb_imbalance_load[itype],
+ sd->lb_imbalance_util[itype],
+ sd->lb_imbalance_task[itype],
+ sd->lb_imbalance_misfit[itype],
sd->lb_gained[itype],
sd->lb_hot_gained[itype],
sd->lb_nobusyq[itype],
--
2.43.0
On Wed, Dec 18, 2024 at 04:36:26AM +0000, Swapnil Sapkal wrote:
> +static void update_lb_imbalance_stat(struct lb_env *env, struct sched_domain *sd,
> + enum cpu_idle_type idle)
> +{
> + switch (env->migration_type) {
> + case migrate_load:
> + schedstat_add(sd->lb_imbalance_load[idle], env->imbalance);
> + break;
> + case migrate_util:
> + schedstat_add(sd->lb_imbalance_util[idle], env->imbalance);
> + break;
> + case migrate_task:
> + schedstat_add(sd->lb_imbalance_task[idle], env->imbalance);
> + break;
> + case migrate_misfit:
> + schedstat_add(sd->lb_imbalance_misfit[idle], env->imbalance);
> + break;
> + }
> +}
Can you please write that like:
if (!schedstat_enabled())
return;
switch () {
case ...
__schedstat_add();
}
It makes no sense to have 4 copies of schedstat_enabled() inside the
switch statement -- esp. since afaik the compilers aren't able to CSE
static keys :/
Hello Peter,
Thanks for the review.
On 12/18/2024 5:10 PM, Peter Zijlstra wrote:
> On Wed, Dec 18, 2024 at 04:36:26AM +0000, Swapnil Sapkal wrote:
>
>> +static void update_lb_imbalance_stat(struct lb_env *env, struct sched_domain *sd,
>> + enum cpu_idle_type idle)
>> +{
>> + switch (env->migration_type) {
>> + case migrate_load:
>> + schedstat_add(sd->lb_imbalance_load[idle], env->imbalance);
>> + break;
>> + case migrate_util:
>> + schedstat_add(sd->lb_imbalance_util[idle], env->imbalance);
>> + break;
>> + case migrate_task:
>> + schedstat_add(sd->lb_imbalance_task[idle], env->imbalance);
>> + break;
>> + case migrate_misfit:
>> + schedstat_add(sd->lb_imbalance_misfit[idle], env->imbalance);
>> + break;
>> + }
>> +}
>
>
> Can you please write that like:
>
> if (!schedstat_enabled())
> return;
>
> switch () {
> case ...
> __schedstat_add();
> }
>
> It makes no sense to have 4 copies of schedstat_enabled() inside the
> switch statement -- esp. since afaik the compilers aren't able to CSE
> static keys :/
This makes sense. I will update this change in v2.
--
Thanks and Regards,
Swapnil
© 2016 - 2026 Red Hat, Inc.