kernel/sched/stats.c | 86 ++++++++++++++++++++++++++------------------ 1 file changed, 52 insertions(+), 34 deletions(-)
Function seq_printf supports rich format string for decimals printing,
but there is no need for it in /proc/schedstat, since majority of the
data is space separared decimals. Use seq_put_decimal_ull instead as
faster alternative.
Performance counter stats (truncated) for sh -c 'cat /proc/schedstat >
/dev/null' before and after applying the patch from machine with 72 CPUs
are below.
Before:
2.94 msec task-clock # 0.820 CPUs utilized
1 context-switches # 340.551 /sec
0 cpu-migrations # 0.000 /sec
340 page-faults # 115.787 K/sec
10,327,200 instructions # 1.89 insn per cycle
# 0.10 stalled cycles per insn
5,458,307 cycles # 1.859 GHz
1,052,733 stalled-cycles-frontend # 19.29% frontend cycles idle
2,066,321 branches # 703.687 M/sec
25,621 branch-misses # 1.24% of all branches
0.00357974 +- 0.00000209 seconds time elapsed ( +- 0.06% )
After:
2.50 msec task-clock # 0.785 CPUs utilized
1 context-switches # 399.780 /sec
0 cpu-migrations # 0.000 /sec
340 page-faults # 135.925 K/sec
7,371,867 instructions # 1.59 insn per cycle
# 0.13 stalled cycles per insn
4,647,053 cycles # 1.858 GHz
986,487 stalled-cycles-frontend # 21.23% frontend cycles idle
1,591,374 branches # 636.199 M/sec
28,973 branch-misses # 1.82% of all branches
0.00318461 +- 0.00000295 seconds time elapsed ( +- 0.09% )
This is ~11% (relative) improvement in time elapsed.
Signed-off-by: Dmitry Ilvokhin <d@ilvokhin.com>
---
kernel/sched/stats.c | 86 ++++++++++++++++++++++++++------------------
1 file changed, 52 insertions(+), 34 deletions(-)
diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c
index d1c9429a4ac5..b304f821e8ff 100644
--- a/kernel/sched/stats.c
+++ b/kernel/sched/stats.c
@@ -98,6 +98,56 @@ void __update_stats_enqueue_sleeper(struct rq *rq, struct task_struct *p,
}
}
+static void show_runqueue_stats(struct seq_file *seq, int cpu, struct rq *rq)
+{
+ seq_printf(seq, "cpu%d", cpu);
+ seq_put_decimal_ull(seq, " ", rq->yld_count);
+ seq_put_decimal_ull(seq, " ", 0);
+ seq_put_decimal_ull(seq, " ", rq->sched_count);
+ seq_put_decimal_ull(seq, " ", rq->sched_goidle);
+ seq_put_decimal_ull(seq, " ", rq->ttwu_count);
+ seq_put_decimal_ull(seq, " ", rq->ttwu_local);
+ seq_put_decimal_ull(seq, " ", rq->rq_cpu_time);
+ seq_put_decimal_ull(seq, " ", rq->rq_sched_info.run_delay);
+ seq_put_decimal_ull(seq, " ", rq->rq_sched_info.pcount);
+ seq_putc(seq, '\n');
+}
+
+static void show_domain_stats(struct seq_file *seq, int dcount,
+ struct sched_domain *sd)
+{
+ enum cpu_idle_type itype;
+
+ seq_printf(seq, "domain%d %s %*pb", dcount, sd->name,
+ cpumask_pr_args(sched_domain_span(sd)));
+ for (itype = 0; itype < CPU_MAX_IDLE_TYPES; itype++) {
+ seq_put_decimal_ull(seq, " ", sd->lb_count[itype]);
+ seq_put_decimal_ull(seq, " ", sd->lb_balanced[itype]);
+ seq_put_decimal_ull(seq, " ", sd->lb_failed[itype]);
+ seq_put_decimal_ull(seq, " ", sd->lb_imbalance_load[itype]);
+ seq_put_decimal_ull(seq, " ", sd->lb_imbalance_util[itype]);
+ seq_put_decimal_ull(seq, " ", sd->lb_imbalance_task[itype]);
+ seq_put_decimal_ull(seq, " ", sd->lb_imbalance_misfit[itype]);
+ seq_put_decimal_ull(seq, " ", sd->lb_gained[itype]);
+ seq_put_decimal_ull(seq, " ", sd->lb_hot_gained[itype]);
+ seq_put_decimal_ull(seq, " ", sd->lb_nobusyq[itype]);
+ seq_put_decimal_ull(seq, " ", sd->lb_nobusyg[itype]);
+ }
+ seq_put_decimal_ull(seq, " ", sd->alb_count);
+ seq_put_decimal_ull(seq, " ", sd->alb_failed);
+ seq_put_decimal_ull(seq, " ", sd->alb_pushed);
+ seq_put_decimal_ull(seq, " ", sd->sbe_count);
+ seq_put_decimal_ull(seq, " ", sd->sbe_balanced);
+ seq_put_decimal_ull(seq, " ", sd->sbe_pushed);
+ seq_put_decimal_ull(seq, " ", sd->sbf_count);
+ seq_put_decimal_ull(seq, " ", sd->sbf_balanced);
+ seq_put_decimal_ull(seq, " ", sd->sbf_pushed);
+ seq_put_decimal_ull(seq, " ", sd->ttwu_wake_remote);
+ seq_put_decimal_ull(seq, " ", sd->ttwu_move_affine);
+ seq_put_decimal_ull(seq, " ", sd->ttwu_move_balance);
+ seq_putc(seq, '\n');
+}
+
/*
* Current schedstat API version.
*
@@ -121,44 +171,12 @@ static int show_schedstat(struct seq_file *seq, void *v)
rq = cpu_rq(cpu);
/* runqueue-specific stats */
- seq_printf(seq,
- "cpu%d %u 0 %u %u %u %u %llu %llu %lu",
- cpu, rq->yld_count,
- rq->sched_count, rq->sched_goidle,
- rq->ttwu_count, rq->ttwu_local,
- rq->rq_cpu_time,
- rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount);
-
- seq_printf(seq, "\n");
+ show_runqueue_stats(seq, cpu, rq);
/* domain-specific stats */
rcu_read_lock();
for_each_domain(cpu, sd) {
- enum cpu_idle_type itype;
-
- seq_printf(seq, "domain%d %s %*pb", dcount++, sd->name,
- cpumask_pr_args(sched_domain_span(sd)));
- for (itype = 0; itype < CPU_MAX_IDLE_TYPES; itype++) {
- seq_printf(seq, " %u %u %u %u %u %u %u %u %u %u %u",
- sd->lb_count[itype],
- sd->lb_balanced[itype],
- sd->lb_failed[itype],
- sd->lb_imbalance_load[itype],
- sd->lb_imbalance_util[itype],
- sd->lb_imbalance_task[itype],
- sd->lb_imbalance_misfit[itype],
- sd->lb_gained[itype],
- sd->lb_hot_gained[itype],
- sd->lb_nobusyq[itype],
- sd->lb_nobusyg[itype]);
- }
- seq_printf(seq,
- " %u %u %u %u %u %u %u %u %u %u %u %u\n",
- sd->alb_count, sd->alb_failed, sd->alb_pushed,
- sd->sbe_count, sd->sbe_balanced, sd->sbe_pushed,
- sd->sbf_count, sd->sbf_balanced, sd->sbf_pushed,
- sd->ttwu_wake_remote, sd->ttwu_move_affine,
- sd->ttwu_move_balance);
+ show_domain_stats(seq, dcount++, sd);
}
rcu_read_unlock();
}
--
2.47.3
© 2016 - 2025 Red Hat, Inc.