From: Joshua Hahn <joshua.hahn6@gmail.com>
Cgroup-level CPU statistics currently include time spent on
user/system processes, but do not include niced CPU time (despite
already being tracked). This patch exposes niced CPU time to the
userspace, allowing users to get a better understanding of their
hardware limits and can facilitate more informed workload distribution.
A new field 'ntime' is added to struct cgroup_base_stat as opposed to
struct task_cputime to minimize footprint.
Signed-off-by: Joshua Hahn <joshua.hahnjy@gmail.com>
---
include/linux/cgroup-defs.h | 1 +
kernel/cgroup/rstat.c | 16 +++++++++++++---
2 files changed, 14 insertions(+), 3 deletions(-)
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index ae04035b6cbe..a2fcb3db6c52 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -315,6 +315,7 @@ struct cgroup_base_stat {
#ifdef CONFIG_SCHED_CORE
u64 forceidle_sum;
#endif
+ u64 ntime;
};
/*
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index a06b45272411..a77ba9a83bab 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -444,6 +444,7 @@ static void cgroup_base_stat_add(struct cgroup_base_stat *dst_bstat,
#ifdef CONFIG_SCHED_CORE
dst_bstat->forceidle_sum += src_bstat->forceidle_sum;
#endif
+ dst_bstat->ntime += src_bstat->ntime;
}
static void cgroup_base_stat_sub(struct cgroup_base_stat *dst_bstat,
@@ -455,6 +456,7 @@ static void cgroup_base_stat_sub(struct cgroup_base_stat *dst_bstat,
#ifdef CONFIG_SCHED_CORE
dst_bstat->forceidle_sum -= src_bstat->forceidle_sum;
#endif
+ dst_bstat->ntime -= src_bstat->ntime;
}
static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu)
@@ -535,7 +537,10 @@ void __cgroup_account_cputime_field(struct cgroup *cgrp,
switch (index) {
case CPUTIME_USER:
+ rstatc->bstat.cputime.utime += delta_exec;
+ break;
case CPUTIME_NICE:
+ rstatc->bstat.ntime += delta_exec;
rstatc->bstat.cputime.utime += delta_exec;
break;
case CPUTIME_SYSTEM:
@@ -591,6 +596,7 @@ static void root_cgroup_cputime(struct cgroup_base_stat *bstat)
#ifdef CONFIG_SCHED_CORE
bstat->forceidle_sum += cpustat[CPUTIME_FORCEIDLE];
#endif
+ bstat->ntime += cpustat[CPUTIME_NICE];
}
}
@@ -608,13 +614,14 @@ static void cgroup_force_idle_show(struct seq_file *seq, struct cgroup_base_stat
void cgroup_base_stat_cputime_show(struct seq_file *seq)
{
struct cgroup *cgrp = seq_css(seq)->cgroup;
- u64 usage, utime, stime;
+ u64 usage, utime, stime, ntime;
if (cgroup_parent(cgrp)) {
cgroup_rstat_flush_hold(cgrp);
usage = cgrp->bstat.cputime.sum_exec_runtime;
cputime_adjust(&cgrp->bstat.cputime, &cgrp->prev_cputime,
&utime, &stime);
+ ntime = cgrp->bstat.ntime;
cgroup_rstat_flush_release(cgrp);
} else {
/* cgrp->bstat of root is not actually used, reuse it */
@@ -622,16 +629,19 @@ void cgroup_base_stat_cputime_show(struct seq_file *seq)
usage = cgrp->bstat.cputime.sum_exec_runtime;
utime = cgrp->bstat.cputime.utime;
stime = cgrp->bstat.cputime.stime;
+ ntime = cgrp->bstat.ntime;
}
do_div(usage, NSEC_PER_USEC);
do_div(utime, NSEC_PER_USEC);
do_div(stime, NSEC_PER_USEC);
+ do_div(ntime, NSEC_PER_USEC);
seq_printf(seq, "usage_usec %llu\n"
"user_usec %llu\n"
- "system_usec %llu\n",
- usage, utime, stime);
+ "system_usec %llu\n"
+ "nice_usec %llu\n",
+ usage, utime, stime, ntime);
cgroup_force_idle_show(seq, &cgrp->bstat);
}
--
2.43.5
On Mon, Sep 23, 2024 at 07:20:05AM GMT, Joshua Hahn <joshua.hahnjy@gmail.com> wrote: > static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu) > @@ -535,7 +537,10 @@ void __cgroup_account_cputime_field(struct cgroup *cgrp, > > switch (index) { > case CPUTIME_USER: > + rstatc->bstat.cputime.utime += delta_exec; > + break; > case CPUTIME_NICE: > + rstatc->bstat.ntime += delta_exec; > rstatc->bstat.cputime.utime += delta_exec; > break; Nit: slightly better diffstat is possible with fallthrough: rstatc = cgroup_base_stat_cputime_account_begin(cgrp, &flags); switch (index) { - case CPUTIME_USER: case CPUTIME_NICE: + rstatc->bstat.ntime += delta_exec; + fallthrough; + case CPUTIME_USER: rstatc->bstat.cputime.utime += delta_exec; break; case CPUTIME_SYSTEM: > @@ -622,16 +629,19 @@ void cgroup_base_stat_cputime_show(struct seq_file *seq) ... > > seq_printf(seq, "usage_usec %llu\n" > "user_usec %llu\n" > - "system_usec %llu\n", > - usage, utime, stime); > + "system_usec %llu\n" > + "nice_usec %llu\n", > + usage, utime, stime, ntime); This seems to be different whitespace alignment than user_usec above. (Implementation looks good, I only have some remarks to the concept, reply to cover letter.) Michal
© 2016 - 2024 Red Hat, Inc.