kernel/sched/fair.c | 101 +++++++++++++++++++++++-------------------- kernel/sched/sched.h | 3 +- 2 files changed, 56 insertions(+), 48 deletions(-)
The current code subtracts the value of curr from avg_vruntime and avg_load
during runtime. Then, every time avg_vruntime() is called, it adds the
value of curr to the avg_vruntime and avg_load. Afterward, it divides these
and adds min_vruntime to obtain the actual avg_vruntime.
Analysis of the code indicates that avg_vruntime only changes significantly
during update_curr(), update_min_vruntime(), and when tasks are enqueued or
dequeued. Therefore, it is sufficient to recalculate and store avg_vruntime
only in these specific scenarios. This optimization ensures that accessing
avg_vruntime() does not necessitate a recalculation each time, thereby
enhancing the efficiency of the code.
There is no need to subtract curr’s load from avg_load during runtime.
Instead, we only need to calculate the incremental change and update
avg_vruntime whenever curr’s time is updated.
To better represent their functions, rename the original avg_vruntime and
avg_load to tot_vruntime and tot_load, respectively, which more accurately
describes their roles in the computation.
Signed-off-by: Xavier <xavier_qy@163.com>
---
kernel/sched/fair.c | 101 +++++++++++++++++++++++--------------------
kernel/sched/sched.h | 3 +-
2 files changed, 56 insertions(+), 48 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 9057584ec06d..308d4bc3f40d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -606,8 +606,8 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
* Which we track using:
*
* v0 := cfs_rq->min_vruntime
- * \Sum (v_i - v0) * w_i := cfs_rq->avg_vruntime
- * \Sum w_i := cfs_rq->avg_load
+ * \Sum (v_i - v0) * w_i := cfs_rq->tot_vruntime
+ * \Sum w_i := cfs_rq->tot_load
*
* Since min_vruntime is a monotonic increasing variable that closely tracks
* the per-task service, these deltas: (v_i - v), will be in the order of the
@@ -617,14 +617,29 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
*
* As measured, the max (key * weight) value was ~44 bits for a kernel build.
*/
+static inline void avg_vruntime_update(struct cfs_rq *cfs_rq)
+{
+ s64 tot_vruntime = cfs_rq->tot_vruntime;
+
+ /* sign flips effective floor / ceiling */
+ if (cfs_rq->tot_load) {
+ if (tot_vruntime < 0)
+ tot_vruntime -= (cfs_rq->tot_load - 1);
+ cfs_rq->avg_vruntime = div_s64(tot_vruntime, cfs_rq->tot_load);
+ } else {
+ cfs_rq->avg_vruntime = cfs_rq->tot_vruntime;
+ }
+}
+
static void
avg_vruntime_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
unsigned long weight = scale_load_down(se->load.weight);
s64 key = entity_key(cfs_rq, se);
- cfs_rq->avg_vruntime += key * weight;
- cfs_rq->avg_load += weight;
+ cfs_rq->tot_vruntime += key * weight;
+ cfs_rq->tot_load += weight;
+ avg_vruntime_update(cfs_rq);
}
static void
@@ -633,17 +648,29 @@ avg_vruntime_sub(struct cfs_rq *cfs_rq, struct sched_entity *se)
unsigned long weight = scale_load_down(se->load.weight);
s64 key = entity_key(cfs_rq, se);
- cfs_rq->avg_vruntime -= key * weight;
- cfs_rq->avg_load -= weight;
+ cfs_rq->tot_vruntime -= key * weight;
+ cfs_rq->tot_load -= weight;
+ avg_vruntime_update(cfs_rq);
+}
+
+static inline
+void avg_vruntime_update_for_curr(struct cfs_rq *cfs_rq, s64 delta)
+{
+ struct sched_entity *curr = cfs_rq->curr;
+ unsigned long weight = scale_load_down(curr->load.weight);
+
+ cfs_rq->tot_vruntime += delta * weight;
+ avg_vruntime_update(cfs_rq);
}
static inline
-void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
+void avg_vruntime_update_for_minv(struct cfs_rq *cfs_rq, s64 delta)
{
/*
- * v' = v + d ==> avg_vruntime' = avg_runtime - d*avg_load
+ * v' = v + d ==> avg_runtime' = tot_runtime - d*tot_load
*/
- cfs_rq->avg_vruntime -= cfs_rq->avg_load * delta;
+ cfs_rq->tot_vruntime -= cfs_rq->tot_load * delta;
+ avg_vruntime_update(cfs_rq);
}
/*
@@ -652,25 +679,7 @@ void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
*/
u64 avg_vruntime(struct cfs_rq *cfs_rq)
{
- struct sched_entity *curr = cfs_rq->curr;
- s64 avg = cfs_rq->avg_vruntime;
- long load = cfs_rq->avg_load;
-
- if (curr && curr->on_rq) {
- unsigned long weight = scale_load_down(curr->load.weight);
-
- avg += entity_key(cfs_rq, curr) * weight;
- load += weight;
- }
-
- if (load) {
- /* sign flips effective floor / ceiling */
- if (avg < 0)
- avg -= (load - 1);
- avg = div_s64(avg, load);
- }
-
- return cfs_rq->min_vruntime + avg;
+ return cfs_rq->min_vruntime + cfs_rq->avg_vruntime;
}
/*
@@ -725,18 +734,10 @@ static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
*/
static int vruntime_eligible(struct cfs_rq *cfs_rq, u64 vruntime)
{
- struct sched_entity *curr = cfs_rq->curr;
- s64 avg = cfs_rq->avg_vruntime;
- long load = cfs_rq->avg_load;
+ s64 total = cfs_rq->tot_vruntime;
+ long load = cfs_rq->tot_load;
- if (curr && curr->on_rq) {
- unsigned long weight = scale_load_down(curr->load.weight);
-
- avg += entity_key(cfs_rq, curr) * weight;
- load += weight;
- }
-
- return avg >= (s64)(vruntime - cfs_rq->min_vruntime) * load;
+ return total >= (s64)(vruntime - cfs_rq->min_vruntime) * load;
}
int entity_eligible(struct cfs_rq *cfs_rq, struct sched_entity *se)
@@ -752,7 +753,7 @@ static u64 __update_min_vruntime(struct cfs_rq *cfs_rq, u64 vruntime)
*/
s64 delta = (s64)(vruntime - min_vruntime);
if (delta > 0) {
- avg_vruntime_update(cfs_rq, delta);
+ avg_vruntime_update_for_minv(cfs_rq, delta);
min_vruntime = vruntime;
}
return min_vruntime;
@@ -822,7 +823,6 @@ RB_DECLARE_CALLBACKS(static, min_vruntime_cb, struct sched_entity,
*/
static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
- avg_vruntime_add(cfs_rq, se);
se->min_vruntime = se->vruntime;
rb_add_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
__entity_less, &min_vruntime_cb);
@@ -832,7 +832,6 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
rb_erase_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
&min_vruntime_cb);
- avg_vruntime_sub(cfs_rq, se);
}
struct sched_entity *__pick_root_entity(struct cfs_rq *cfs_rq)
@@ -1157,6 +1156,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
{
struct sched_entity *curr = cfs_rq->curr;
s64 delta_exec;
+ s64 vdelta_exec;
if (unlikely(!curr))
return;
@@ -1165,8 +1165,10 @@ static void update_curr(struct cfs_rq *cfs_rq)
if (unlikely(delta_exec <= 0))
return;
- curr->vruntime += calc_delta_fair(delta_exec, curr);
+ vdelta_exec = calc_delta_fair(delta_exec, curr);
+ curr->vruntime += vdelta_exec;
update_deadline(cfs_rq, curr);
+ avg_vruntime_update_for_curr(cfs_rq, vdelta_exec);
update_min_vruntime(cfs_rq);
if (entity_is_task(curr))
@@ -3794,6 +3796,8 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
avruntime = avg_vruntime(cfs_rq);
if (!curr)
__dequeue_entity(cfs_rq, se);
+
+ avg_vruntime_sub(cfs_rq, se);
update_load_sub(&cfs_rq->load, se->load.weight);
}
dequeue_load_avg(cfs_rq, se);
@@ -3824,6 +3828,8 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
if (!curr)
__enqueue_entity(cfs_rq, se);
+ avg_vruntime_add(cfs_rq, se);
+
/*
* The entity's vruntime has been adjusted, so let's check
* whether the rq-wide min_vruntime needs updated too. Since
@@ -5190,7 +5196,6 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
* EEVDF: placement strategy #1 / #2
*/
if (sched_feat(PLACE_LAG) && cfs_rq->nr_running) {
- struct sched_entity *curr = cfs_rq->curr;
unsigned long load;
lag = se->vlag;
@@ -5247,9 +5252,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
*
* vl_i = (W + w_i)*vl'_i / W
*/
- load = cfs_rq->avg_load;
- if (curr && curr->on_rq)
- load += scale_load_down(curr->load.weight);
+ load = cfs_rq->tot_load;
lag *= load + scale_load_down(se->load.weight);
if (WARN_ON_ONCE(!load))
@@ -5327,6 +5330,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
update_stats_enqueue_fair(cfs_rq, se, flags);
if (!curr)
__enqueue_entity(cfs_rq, se);
+
+ avg_vruntime_add(cfs_rq, se);
se->on_rq = 1;
if (cfs_rq->nr_running == 1) {
@@ -5397,6 +5402,8 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
update_entity_lag(cfs_rq, se);
if (se != cfs_rq->curr)
__dequeue_entity(cfs_rq, se);
+
+ avg_vruntime_sub(cfs_rq, se);
se->on_rq = 0;
account_entity_dequeue(cfs_rq, se);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 4c36cc680361..57f07c56ecda 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -596,8 +596,9 @@ struct cfs_rq {
unsigned int idle_nr_running; /* SCHED_IDLE */
unsigned int idle_h_nr_running; /* SCHED_IDLE */
+ s64 tot_vruntime;
s64 avg_vruntime;
- u64 avg_load;
+ u64 tot_load;
u64 exec_clock;
u64 min_vruntime;
--
2.45.2
Hello Peter, Do you have any suggestions regarding the optimization patch I proposed? Or does anyone else have any thoughts? Thank you! At 2024-09-12 17:14:54, "Xavier" <xavier_qy@163.com> wrote: >The current code subtracts the value of curr from avg_vruntime and avg_load >during runtime. Then, every time avg_vruntime() is called, it adds the >value of curr to the avg_vruntime and avg_load. Afterward, it divides these >and adds min_vruntime to obtain the actual avg_vruntime. > >Analysis of the code indicates that avg_vruntime only changes significantly >during update_curr(), update_min_vruntime(), and when tasks are enqueued or >dequeued. Therefore, it is sufficient to recalculate and store avg_vruntime >only in these specific scenarios. This optimization ensures that accessing >avg_vruntime() does not necessitate a recalculation each time, thereby >enhancing the efficiency of the code. > >There is no need to subtract curr’s load from avg_load during runtime. >Instead, we only need to calculate the incremental change and update >avg_vruntime whenever curr’s time is updated. > >To better represent their functions, rename the original avg_vruntime and >avg_load to tot_vruntime and tot_load, respectively, which more accurately >describes their roles in the computation. > >Signed-off-by: Xavier <xavier_qy@163.com> >--- > kernel/sched/fair.c | 101 +++++++++++++++++++++++-------------------- > kernel/sched/sched.h | 3 +- > 2 files changed, 56 insertions(+), 48 deletions(-) > >diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c >index 9057584ec06d..308d4bc3f40d 100644 >--- a/kernel/sched/fair.c >+++ b/kernel/sched/fair.c >@@ -606,8 +606,8 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se) > * Which we track using: > * > * v0 := cfs_rq->min_vruntime >- * \Sum (v_i - v0) * w_i := cfs_rq->avg_vruntime >- * \Sum w_i := cfs_rq->avg_load >+ * \Sum (v_i - v0) * w_i := cfs_rq->tot_vruntime >+ * \Sum w_i := cfs_rq->tot_load > * > * Since min_vruntime is a monotonic increasing variable that closely tracks > * the per-task service, these deltas: (v_i - v), will be in the order of the >@@ -617,14 +617,29 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se) > * > * As measured, the max (key * weight) value was ~44 bits for a kernel build. > */ >+static inline void avg_vruntime_update(struct cfs_rq *cfs_rq) >+{ >+ s64 tot_vruntime = cfs_rq->tot_vruntime; >+ >+ /* sign flips effective floor / ceiling */ >+ if (cfs_rq->tot_load) { >+ if (tot_vruntime < 0) >+ tot_vruntime -= (cfs_rq->tot_load - 1); >+ cfs_rq->avg_vruntime = div_s64(tot_vruntime, cfs_rq->tot_load); >+ } else { >+ cfs_rq->avg_vruntime = cfs_rq->tot_vruntime; >+ } >+} >+ > static void > avg_vruntime_add(struct cfs_rq *cfs_rq, struct sched_entity *se) > { > unsigned long weight = scale_load_down(se->load.weight); > s64 key = entity_key(cfs_rq, se); > >- cfs_rq->avg_vruntime += key * weight; >- cfs_rq->avg_load += weight; >+ cfs_rq->tot_vruntime += key * weight; >+ cfs_rq->tot_load += weight; >+ avg_vruntime_update(cfs_rq); > } > > static void >@@ -633,17 +648,29 @@ avg_vruntime_sub(struct cfs_rq *cfs_rq, struct sched_entity *se) > unsigned long weight = scale_load_down(se->load.weight); > s64 key = entity_key(cfs_rq, se); > >- cfs_rq->avg_vruntime -= key * weight; >- cfs_rq->avg_load -= weight; >+ cfs_rq->tot_vruntime -= key * weight; >+ cfs_rq->tot_load -= weight; >+ avg_vruntime_update(cfs_rq); >+} >+ >+static inline >+void avg_vruntime_update_for_curr(struct cfs_rq *cfs_rq, s64 delta) >+{ >+ struct sched_entity *curr = cfs_rq->curr; >+ unsigned long weight = scale_load_down(curr->load.weight); >+ >+ cfs_rq->tot_vruntime += delta * weight; >+ avg_vruntime_update(cfs_rq); > } > > static inline >-void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta) >+void avg_vruntime_update_for_minv(struct cfs_rq *cfs_rq, s64 delta) > { > /* >- * v' = v + d ==> avg_vruntime' = avg_runtime - d*avg_load >+ * v' = v + d ==> avg_runtime' = tot_runtime - d*tot_load > */ >- cfs_rq->avg_vruntime -= cfs_rq->avg_load * delta; >+ cfs_rq->tot_vruntime -= cfs_rq->tot_load * delta; >+ avg_vruntime_update(cfs_rq); > } > > /* >@@ -652,25 +679,7 @@ void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta) > */ > u64 avg_vruntime(struct cfs_rq *cfs_rq) > { >- struct sched_entity *curr = cfs_rq->curr; >- s64 avg = cfs_rq->avg_vruntime; >- long load = cfs_rq->avg_load; >- >- if (curr && curr->on_rq) { >- unsigned long weight = scale_load_down(curr->load.weight); >- >- avg += entity_key(cfs_rq, curr) * weight; >- load += weight; >- } >- >- if (load) { >- /* sign flips effective floor / ceiling */ >- if (avg < 0) >- avg -= (load - 1); >- avg = div_s64(avg, load); >- } >- >- return cfs_rq->min_vruntime + avg; >+ return cfs_rq->min_vruntime + cfs_rq->avg_vruntime; > } > > /* >@@ -725,18 +734,10 @@ static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se) > */ > static int vruntime_eligible(struct cfs_rq *cfs_rq, u64 vruntime) > { >- struct sched_entity *curr = cfs_rq->curr; >- s64 avg = cfs_rq->avg_vruntime; >- long load = cfs_rq->avg_load; >+ s64 total = cfs_rq->tot_vruntime; >+ long load = cfs_rq->tot_load; > >- if (curr && curr->on_rq) { >- unsigned long weight = scale_load_down(curr->load.weight); >- >- avg += entity_key(cfs_rq, curr) * weight; >- load += weight; >- } >- >- return avg >= (s64)(vruntime - cfs_rq->min_vruntime) * load; >+ return total >= (s64)(vruntime - cfs_rq->min_vruntime) * load; > } > > int entity_eligible(struct cfs_rq *cfs_rq, struct sched_entity *se) >@@ -752,7 +753,7 @@ static u64 __update_min_vruntime(struct cfs_rq *cfs_rq, u64 vruntime) > */ > s64 delta = (s64)(vruntime - min_vruntime); > if (delta > 0) { >- avg_vruntime_update(cfs_rq, delta); >+ avg_vruntime_update_for_minv(cfs_rq, delta); > min_vruntime = vruntime; > } > return min_vruntime; >@@ -822,7 +823,6 @@ RB_DECLARE_CALLBACKS(static, min_vruntime_cb, struct sched_entity, > */ > static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) > { >- avg_vruntime_add(cfs_rq, se); > se->min_vruntime = se->vruntime; > rb_add_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline, > __entity_less, &min_vruntime_cb); >@@ -832,7 +832,6 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) > { > rb_erase_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline, > &min_vruntime_cb); >- avg_vruntime_sub(cfs_rq, se); > } > > struct sched_entity *__pick_root_entity(struct cfs_rq *cfs_rq) >@@ -1157,6 +1156,7 @@ static void update_curr(struct cfs_rq *cfs_rq) > { > struct sched_entity *curr = cfs_rq->curr; > s64 delta_exec; >+ s64 vdelta_exec; > > if (unlikely(!curr)) > return; >@@ -1165,8 +1165,10 @@ static void update_curr(struct cfs_rq *cfs_rq) > if (unlikely(delta_exec <= 0)) > return; > >- curr->vruntime += calc_delta_fair(delta_exec, curr); >+ vdelta_exec = calc_delta_fair(delta_exec, curr); >+ curr->vruntime += vdelta_exec; > update_deadline(cfs_rq, curr); >+ avg_vruntime_update_for_curr(cfs_rq, vdelta_exec); > update_min_vruntime(cfs_rq); > > if (entity_is_task(curr)) >@@ -3794,6 +3796,8 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, > avruntime = avg_vruntime(cfs_rq); > if (!curr) > __dequeue_entity(cfs_rq, se); >+ >+ avg_vruntime_sub(cfs_rq, se); > update_load_sub(&cfs_rq->load, se->load.weight); > } > dequeue_load_avg(cfs_rq, se); >@@ -3824,6 +3828,8 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, > if (!curr) > __enqueue_entity(cfs_rq, se); > >+ avg_vruntime_add(cfs_rq, se); >+ > /* > * The entity's vruntime has been adjusted, so let's check > * whether the rq-wide min_vruntime needs updated too. Since >@@ -5190,7 +5196,6 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) > * EEVDF: placement strategy #1 / #2 > */ > if (sched_feat(PLACE_LAG) && cfs_rq->nr_running) { >- struct sched_entity *curr = cfs_rq->curr; > unsigned long load; > > lag = se->vlag; >@@ -5247,9 +5252,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) > * > * vl_i = (W + w_i)*vl'_i / W > */ >- load = cfs_rq->avg_load; >- if (curr && curr->on_rq) >- load += scale_load_down(curr->load.weight); >+ load = cfs_rq->tot_load; > > lag *= load + scale_load_down(se->load.weight); > if (WARN_ON_ONCE(!load)) >@@ -5327,6 +5330,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) > update_stats_enqueue_fair(cfs_rq, se, flags); > if (!curr) > __enqueue_entity(cfs_rq, se); >+ >+ avg_vruntime_add(cfs_rq, se); > se->on_rq = 1; > > if (cfs_rq->nr_running == 1) { >@@ -5397,6 +5402,8 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) > update_entity_lag(cfs_rq, se); > if (se != cfs_rq->curr) > __dequeue_entity(cfs_rq, se); >+ >+ avg_vruntime_sub(cfs_rq, se); > se->on_rq = 0; > account_entity_dequeue(cfs_rq, se); > >diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h >index 4c36cc680361..57f07c56ecda 100644 >--- a/kernel/sched/sched.h >+++ b/kernel/sched/sched.h >@@ -596,8 +596,9 @@ struct cfs_rq { > unsigned int idle_nr_running; /* SCHED_IDLE */ > unsigned int idle_h_nr_running; /* SCHED_IDLE */ > >+ s64 tot_vruntime; > s64 avg_vruntime; >- u64 avg_load; >+ u64 tot_load; > > u64 exec_clock; > u64 min_vruntime; >-- >2.45.2
The current code subtracts the value of curr from avg_vruntime and avg_load
during runtime. Then, every time avg_vruntime() is called, it adds the
value of curr to the avg_vruntime and avg_load. Afterward, it divides these
and adds min_vruntime to obtain the actual avg_vruntime.
Analysis of the code indicates that avg_vruntime only changes significantly
during update_curr(), update_min_vruntime(), and when tasks are enqueued or
dequeued. Therefore, it is sufficient to recalculate and store avg_vruntime
only in these specific scenarios. This optimization ensures that accessing
avg_vruntime() does not necessitate a recalculation each time, thereby
enhancing the efficiency of the code.
There is no need to subtract curr’s load from avg_load during runtime.
Instead, we only need to calculate the incremental change and update
avg_vruntime whenever curr’s time is updated.
To better represent their functions, rename the original avg_vruntime and
avg_load to tot_vruntime and tot_load, respectively, which more accurately
describes their roles in the computation.
Signed-off-by: Xavier <xavier_qy@163.com>
---
Note:
The patch V2 has been updated based on the latest sched/core branch.
kernel/sched/fair.c | 107 ++++++++++++++++++++++++-------------------
kernel/sched/sched.h | 3 +-
2 files changed, 61 insertions(+), 49 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 5a621210c9c..fb0434dd0a8 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -606,8 +606,8 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
* Which we track using:
*
* v0 := cfs_rq->min_vruntime
- * \Sum (v_i - v0) * w_i := cfs_rq->avg_vruntime
- * \Sum w_i := cfs_rq->avg_load
+ * \Sum (v_i - v0) * w_i := cfs_rq->tot_vruntime
+ * \Sum w_i := cfs_rq->tot_load
*
* Since min_vruntime is a monotonic increasing variable that closely tracks
* the per-task service, these deltas: (v_i - v), will be in the order of the
@@ -617,14 +617,29 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
*
* As measured, the max (key * weight) value was ~44 bits for a kernel build.
*/
+static inline void avg_vruntime_update(struct cfs_rq *cfs_rq)
+{
+ s64 tot_vruntime = cfs_rq->tot_vruntime;
+
+ /* sign flips effective floor / ceiling */
+ if (cfs_rq->tot_load) {
+ if (tot_vruntime < 0)
+ tot_vruntime -= (cfs_rq->tot_load - 1);
+ cfs_rq->avg_vruntime = div_s64(tot_vruntime, cfs_rq->tot_load);
+ } else {
+ cfs_rq->avg_vruntime = cfs_rq->tot_vruntime;
+ }
+}
+
static void
avg_vruntime_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
unsigned long weight = scale_load_down(se->load.weight);
s64 key = entity_key(cfs_rq, se);
- cfs_rq->avg_vruntime += key * weight;
- cfs_rq->avg_load += weight;
+ cfs_rq->tot_vruntime += key * weight;
+ cfs_rq->tot_load += weight;
+ avg_vruntime_update(cfs_rq);
}
static void
@@ -633,17 +648,29 @@ avg_vruntime_sub(struct cfs_rq *cfs_rq, struct sched_entity *se)
unsigned long weight = scale_load_down(se->load.weight);
s64 key = entity_key(cfs_rq, se);
- cfs_rq->avg_vruntime -= key * weight;
- cfs_rq->avg_load -= weight;
+ cfs_rq->tot_vruntime -= key * weight;
+ cfs_rq->tot_load -= weight;
+ avg_vruntime_update(cfs_rq);
+}
+
+static inline
+void avg_vruntime_update_for_curr(struct cfs_rq *cfs_rq, s64 delta)
+{
+ struct sched_entity *curr = cfs_rq->curr;
+ unsigned long weight = scale_load_down(curr->load.weight);
+
+ cfs_rq->tot_vruntime += delta * weight;
+ avg_vruntime_update(cfs_rq);
}
static inline
-void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
+void avg_vruntime_update_for_minv(struct cfs_rq *cfs_rq, s64 delta)
{
/*
- * v' = v + d ==> avg_vruntime' = avg_runtime - d*avg_load
+ * v' = v + d ==> avg_runtime' = tot_runtime - d*tot_load
*/
- cfs_rq->avg_vruntime -= cfs_rq->avg_load * delta;
+ cfs_rq->tot_vruntime -= cfs_rq->tot_load * delta;
+ avg_vruntime_update(cfs_rq);
}
/*
@@ -652,25 +679,7 @@ void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
*/
u64 avg_vruntime(struct cfs_rq *cfs_rq)
{
- struct sched_entity *curr = cfs_rq->curr;
- s64 avg = cfs_rq->avg_vruntime;
- long load = cfs_rq->avg_load;
-
- if (curr && curr->on_rq) {
- unsigned long weight = scale_load_down(curr->load.weight);
-
- avg += entity_key(cfs_rq, curr) * weight;
- load += weight;
- }
-
- if (load) {
- /* sign flips effective floor / ceiling */
- if (avg < 0)
- avg -= (load - 1);
- avg = div_s64(avg, load);
- }
-
- return cfs_rq->min_vruntime + avg;
+ return cfs_rq->min_vruntime + cfs_rq->avg_vruntime;
}
/*
@@ -725,18 +734,8 @@ static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
*/
static int vruntime_eligible(struct cfs_rq *cfs_rq, u64 vruntime)
{
- struct sched_entity *curr = cfs_rq->curr;
- s64 avg = cfs_rq->avg_vruntime;
- long load = cfs_rq->avg_load;
-
- if (curr && curr->on_rq) {
- unsigned long weight = scale_load_down(curr->load.weight);
-
- avg += entity_key(cfs_rq, curr) * weight;
- load += weight;
- }
-
- return avg >= (s64)(vruntime - cfs_rq->min_vruntime) * load;
+ return cfs_rq->tot_vruntime >=
+ (s64)(vruntime - cfs_rq->min_vruntime) * (s64)cfs_rq->tot_load;
}
int entity_eligible(struct cfs_rq *cfs_rq, struct sched_entity *se)
@@ -752,7 +751,7 @@ static u64 __update_min_vruntime(struct cfs_rq *cfs_rq, u64 vruntime)
*/
s64 delta = (s64)(vruntime - min_vruntime);
if (delta > 0) {
- avg_vruntime_update(cfs_rq, delta);
+ avg_vruntime_update_for_minv(cfs_rq, delta);
min_vruntime = vruntime;
}
return min_vruntime;
@@ -851,7 +850,6 @@ RB_DECLARE_CALLBACKS(static, min_vruntime_cb, struct sched_entity,
*/
static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
- avg_vruntime_add(cfs_rq, se);
se->min_vruntime = se->vruntime;
se->min_slice = se->slice;
rb_add_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
@@ -862,7 +860,6 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
rb_erase_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
&min_vruntime_cb);
- avg_vruntime_sub(cfs_rq, se);
}
struct sched_entity *__pick_root_entity(struct cfs_rq *cfs_rq)
@@ -1219,6 +1216,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
struct rq *rq = rq_of(cfs_rq);
s64 delta_exec;
bool resched;
+ s64 vdelta_exec;
if (unlikely(!curr))
return;
@@ -1227,8 +1225,10 @@ static void update_curr(struct cfs_rq *cfs_rq)
if (unlikely(delta_exec <= 0))
return;
- curr->vruntime += calc_delta_fair(delta_exec, curr);
+ vdelta_exec = calc_delta_fair(delta_exec, curr);
+ curr->vruntime += vdelta_exec;
resched = update_deadline(cfs_rq, curr);
+ avg_vruntime_update_for_curr(cfs_rq, vdelta_exec);
update_min_vruntime(cfs_rq);
if (entity_is_task(curr)) {
@@ -3883,6 +3883,8 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
avruntime = avg_vruntime(cfs_rq);
if (!curr)
__dequeue_entity(cfs_rq, se);
+
+ avg_vruntime_sub(cfs_rq, se);
update_load_sub(&cfs_rq->load, se->load.weight);
}
dequeue_load_avg(cfs_rq, se);
@@ -3913,6 +3915,8 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
if (!curr)
__enqueue_entity(cfs_rq, se);
+ avg_vruntime_add(cfs_rq, se);
+
/*
* The entity's vruntime has been adjusted, so let's check
* whether the rq-wide min_vruntime needs updated too. Since
@@ -5281,7 +5285,6 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
* EEVDF: placement strategy #1 / #2
*/
if (sched_feat(PLACE_LAG) && cfs_rq->nr_running && se->vlag) {
- struct sched_entity *curr = cfs_rq->curr;
unsigned long load;
lag = se->vlag;
@@ -5338,9 +5341,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
*
* vl_i = (W + w_i)*vl'_i / W
*/
- load = cfs_rq->avg_load;
- if (curr && curr->on_rq)
- load += scale_load_down(curr->load.weight);
+ load = cfs_rq->tot_load;
lag *= load + scale_load_down(se->load.weight);
if (WARN_ON_ONCE(!load))
@@ -5427,6 +5428,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
update_stats_enqueue_fair(cfs_rq, se, flags);
if (!curr)
__enqueue_entity(cfs_rq, se);
+
+ avg_vruntime_add(cfs_rq, se);
se->on_rq = 1;
if (cfs_rq->nr_running == 1) {
@@ -5530,6 +5533,8 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
if (se != cfs_rq->curr)
__dequeue_entity(cfs_rq, se);
+
+ avg_vruntime_sub(cfs_rq, se);
se->on_rq = 0;
account_entity_dequeue(cfs_rq, se);
@@ -6924,11 +6929,17 @@ requeue_delayed_entity(struct sched_entity *se)
cfs_rq->nr_running--;
if (se != cfs_rq->curr)
__dequeue_entity(cfs_rq, se);
+ avg_vruntime_sub(cfs_rq, se);
+
se->vlag = 0;
place_entity(cfs_rq, se, 0);
+
if (se != cfs_rq->curr)
__enqueue_entity(cfs_rq, se);
+ avg_vruntime_add(cfs_rq, se);
cfs_rq->nr_running++;
+
+ update_min_vruntime(cfs_rq);
}
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index b1c3588a8f0..7f7c93518c7 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -650,8 +650,9 @@ struct cfs_rq {
unsigned int idle_nr_running; /* SCHED_IDLE */
unsigned int idle_h_nr_running; /* SCHED_IDLE */
+ s64 tot_vruntime;
s64 avg_vruntime;
- u64 avg_load;
+ u64 tot_load;
u64 min_vruntime;
#ifdef CONFIG_SCHED_CORE
--
2.45.2
Hello, kernel test robot noticed a -13.0% regression of will-it-scale.per_thread_ops on: commit: 538d813df3945cbc9d6a90ba224f36c78c8bb128 ("[PATCH v2] sched/eevdf: Reduce the computation frequency of avg_vruntime") url: https://github.com/intel-lab-lkp/linux/commits/Xavier/sched-eevdf-Reduce-the-computation-frequency-of-avg_vruntime/20241011-142820 base: https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git 7266f0a6d3bb73f42ea06656d3cc48c7d0386f71 patch link: https://lore.kernel.org/all/20241011062449.998696-1-xavier_qy@163.com/ patch subject: [PATCH v2] sched/eevdf: Reduce the computation frequency of avg_vruntime testcase: will-it-scale config: x86_64-rhel-8.3 compiler: gcc-12 test machine: 224 threads 4 sockets Intel(R) Xeon(R) Platinum 8380H CPU @ 2.90GHz (Cooper Lake) with 192G memory parameters: nr_task: 100% mode: thread test: sched_yield cpufreq_governor: performance In addition to that, the commit also has significant impact on the following tests: +------------------+--------------------------------------------------------------------------------------------+ | testcase: change | aim7: aim7.jobs-per-min -16.4% regression | | test machine | 128 threads 2 sockets Intel(R) Xeon(R) Gold 6338 CPU @ 2.00GHz (Ice Lake) with 256G memory | | test parameters | cpufreq_governor=performance | | | disk=1BRD_48G | | | fs=xfs | | | load=3000 | | | test=disk_rr | +------------------+--------------------------------------------------------------------------------------------+ | testcase: change | will-it-scale: will-it-scale.per_thread_ops -4.1% regression | | test machine | 104 threads 2 sockets (Skylake) with 192G memory | | test parameters | cpufreq_governor=performance | | | mode=thread | | | nr_task=100% | | | test=sched_yield | +------------------+--------------------------------------------------------------------------------------------+ If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <oliver.sang@intel.com> | Closes: https://lore.kernel.org/oe-lkp/202410171258.5873933a-oliver.sang@intel.com Details are as below: --------------------------------------------------------------------------------------------------> The kernel config and materials to reproduce are available at: https://download.01.org/0day-ci/archive/20241017/202410171258.5873933a-oliver.sang@intel.com ========================================================================================= compiler/cpufreq_governor/kconfig/mode/nr_task/rootfs/tbox_group/test/testcase: gcc-12/performance/x86_64-rhel-8.3/thread/100%/debian-12-x86_64-20240206.cgz/lkp-cpl-4sp2/sched_yield/will-it-scale commit: 7266f0a6d3 ("fs/bcachefs: Fix __wait_on_freeing_inode() definition of waitqueue entry") 538d813df3 ("sched/eevdf: Reduce the computation frequency of avg_vruntime") 7266f0a6d3bb73f4 538d813df3945cbc9d6a90ba224 ---------------- --------------------------- %stddev %change %stddev \ | \ 10.51 -1.4 9.07 mpstat.cpu.all.usr% 77573 ± 7% +42.8% 110799 ± 31% sched_debug.cpu.avg_idle.stddev 5.853e+08 -13.0% 5.092e+08 will-it-scale.224.threads 2612822 -13.0% 2273147 will-it-scale.per_thread_ops 5.853e+08 -13.0% 5.092e+08 will-it-scale.workload 0.02 ± 46% -44.2% 0.01 ± 6% perf-sched.sch_delay.avg.ms.smpboot_thread_fn.kthread.ret_from_fork.ret_from_fork_asm 2.37 ±190% -95.5% 0.11 ± 19% perf-sched.sch_delay.max.ms.smpboot_thread_fn.kthread.ret_from_fork.ret_from_fork_asm 1.10 ± 47% +107.4% 2.29 ± 45% perf-sched.wait_time.avg.ms.irqentry_exit_to_user_mode.asm_sysvec_apic_timer_interrupt.[unknown].[unknown] 25.62 ±203% +590.0% 176.73 ± 90% perf-sched.wait_time.avg.ms.pipe_read.vfs_read.ksys_read.do_syscall_64 1.692e+11 -10.9% 1.509e+11 perf-stat.i.branch-instructions 1.759e+08 -10.1% 1.582e+08 perf-stat.i.branch-misses 1.13 +10.3% 1.24 ± 2% perf-stat.i.cpi 6.786e+11 -9.2% 6.164e+11 perf-stat.i.instructions 0.90 -9.3% 0.82 perf-stat.i.ipc 1.11 +10.4% 1.22 perf-stat.overall.cpi 0.90 -9.4% 0.82 perf-stat.overall.ipc 350712 +4.5% 366647 perf-stat.overall.path-length 1.686e+11 -10.9% 1.502e+11 perf-stat.ps.branch-instructions 1.754e+08 -10.0% 1.579e+08 ± 2% perf-stat.ps.branch-misses 6.761e+11 -9.2% 6.14e+11 perf-stat.ps.instructions 2.053e+14 -9.1% 1.867e+14 perf-stat.total.instructions 3.82 -2.2 1.60 ± 2% perf-profile.calltrace.cycles-pp.pick_eevdf.pick_task_fair.pick_next_task_fair.__schedule.schedule 13.75 -1.6 12.18 perf-profile.calltrace.cycles-pp.clear_bhb_loop.__sched_yield 10.86 -1.3 9.52 perf-profile.calltrace.cycles-pp.entry_SYSCALL_64.__sched_yield 7.48 -1.0 6.45 perf-profile.calltrace.cycles-pp.do_sched_yield.__x64_sys_sched_yield.do_syscall_64.entry_SYSCALL_64_after_hwframe.__sched_yield 1.32 ± 22% -0.7 0.60 ± 10% perf-profile.calltrace.cycles-pp.testcase 5.09 -0.5 4.54 perf-profile.calltrace.cycles-pp.update_rq_clock.__schedule.schedule.__x64_sys_sched_yield.do_syscall_64 2.22 ± 7% -0.5 1.69 ± 14% perf-profile.calltrace.cycles-pp.perf_trace_sched_stat_runtime.update_curr.pick_task_fair.pick_next_task_fair.__schedule 3.74 -0.5 3.23 perf-profile.calltrace.cycles-pp.update_rq_clock_task.__schedule.schedule.__x64_sys_sched_yield.do_syscall_64 4.00 -0.4 3.56 perf-profile.calltrace.cycles-pp.sched_clock_cpu.update_rq_clock.__schedule.schedule.__x64_sys_sched_yield 3.50 -0.4 3.13 perf-profile.calltrace.cycles-pp.sched_clock.sched_clock_cpu.update_rq_clock.__schedule.schedule 3.31 -0.4 2.95 perf-profile.calltrace.cycles-pp.native_sched_clock.sched_clock.sched_clock_cpu.update_rq_clock.__schedule 2.78 -0.4 2.43 perf-profile.calltrace.cycles-pp.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe.__sched_yield 2.12 -0.3 1.82 perf-profile.calltrace.cycles-pp.raw_spin_rq_lock_nested.do_sched_yield.__x64_sys_sched_yield.do_syscall_64.entry_SYSCALL_64_after_hwframe 2.14 -0.3 1.85 perf-profile.calltrace.cycles-pp.syscall_return_via_sysret.__sched_yield 1.85 -0.3 1.58 perf-profile.calltrace.cycles-pp.update_curr_se.update_curr.pick_task_fair.pick_next_task_fair.__schedule 2.04 -0.3 1.78 perf-profile.calltrace.cycles-pp._raw_spin_lock.raw_spin_rq_lock_nested.do_sched_yield.__x64_sys_sched_yield.do_syscall_64 2.02 -0.3 1.76 perf-profile.calltrace.cycles-pp._raw_spin_lock.__schedule.schedule.__x64_sys_sched_yield.do_syscall_64 2.12 -0.3 1.86 perf-profile.calltrace.cycles-pp.x64_sys_call.do_syscall_64.entry_SYSCALL_64_after_hwframe.__sched_yield 1.13 -0.1 0.98 perf-profile.calltrace.cycles-pp.yield_task_fair.do_sched_yield.__x64_sys_sched_yield.do_syscall_64.entry_SYSCALL_64_after_hwframe 0.95 ± 4% -0.1 0.82 ± 6% perf-profile.calltrace.cycles-pp.rcu_note_context_switch.__schedule.schedule.__x64_sys_sched_yield.do_syscall_64 1.07 -0.1 0.96 perf-profile.calltrace.cycles-pp.syscall_exit_to_user_mode_prepare.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe.__sched_yield 0.77 -0.1 0.66 perf-profile.calltrace.cycles-pp.entry_SYSCALL_64_safe_stack.__sched_yield 1.66 -0.1 1.57 perf-profile.calltrace.cycles-pp.__calc_delta.update_curr.pick_task_fair.pick_next_task_fair.__schedule 98.53 +0.3 98.82 perf-profile.calltrace.cycles-pp.__sched_yield 69.38 +3.8 73.16 perf-profile.calltrace.cycles-pp.entry_SYSCALL_64_after_hwframe.__sched_yield 67.49 +4.0 71.53 perf-profile.calltrace.cycles-pp.do_syscall_64.entry_SYSCALL_64_after_hwframe.__sched_yield 2.87 +5.0 7.91 perf-profile.calltrace.cycles-pp.update_min_vruntime.update_curr.pick_task_fair.pick_next_task_fair.__schedule 58.92 +5.1 64.03 perf-profile.calltrace.cycles-pp.__x64_sys_sched_yield.do_syscall_64.entry_SYSCALL_64_after_hwframe.__sched_yield 51.95 +6.0 57.99 perf-profile.calltrace.cycles-pp.schedule.__x64_sys_sched_yield.do_syscall_64.entry_SYSCALL_64_after_hwframe.__sched_yield 50.08 +6.3 56.34 perf-profile.calltrace.cycles-pp.__schedule.schedule.__x64_sys_sched_yield.do_syscall_64.entry_SYSCALL_64_after_hwframe 32.18 +8.4 40.56 perf-profile.calltrace.cycles-pp.pick_next_task_fair.__schedule.schedule.__x64_sys_sched_yield.do_syscall_64 30.20 +8.5 38.74 perf-profile.calltrace.cycles-pp.pick_task_fair.pick_next_task_fair.__schedule.schedule.__x64_sys_sched_yield 22.44 +11.5 33.93 perf-profile.calltrace.cycles-pp.update_curr.pick_task_fair.pick_next_task_fair.__schedule.schedule 4.00 -2.3 1.65 ± 2% perf-profile.children.cycles-pp.pick_eevdf 13.84 -1.6 12.26 perf-profile.children.cycles-pp.clear_bhb_loop 6.99 -0.9 6.05 perf-profile.children.cycles-pp.do_sched_yield 6.97 -0.9 6.10 perf-profile.children.cycles-pp.entry_SYSCALL_64 5.18 -0.6 4.62 perf-profile.children.cycles-pp.update_rq_clock 4.22 -0.5 3.68 perf-profile.children.cycles-pp._raw_spin_lock 2.27 ± 6% -0.5 1.76 ± 13% perf-profile.children.cycles-pp.perf_trace_sched_stat_runtime 3.77 -0.5 3.26 perf-profile.children.cycles-pp.update_rq_clock_task 4.04 -0.5 3.56 perf-profile.children.cycles-pp.entry_SYSRETQ_unsafe_stack 4.18 -0.5 3.72 perf-profile.children.cycles-pp.sched_clock_cpu 3.77 -0.4 3.36 perf-profile.children.cycles-pp.sched_clock 2.92 -0.4 2.55 perf-profile.children.cycles-pp.syscall_exit_to_user_mode 3.36 -0.4 3.00 perf-profile.children.cycles-pp.native_sched_clock 2.38 -0.3 2.08 perf-profile.children.cycles-pp.raw_spin_rq_lock_nested 2.26 -0.3 1.96 perf-profile.children.cycles-pp.syscall_return_via_sysret 2.03 -0.3 1.74 perf-profile.children.cycles-pp.update_curr_se 2.29 -0.3 2.01 perf-profile.children.cycles-pp.x64_sys_call 0.97 ± 8% -0.3 0.70 ± 10% perf-profile.children.cycles-pp.testcase 1.15 -0.1 1.00 perf-profile.children.cycles-pp.yield_task_fair 1.01 ± 4% -0.1 0.87 ± 5% perf-profile.children.cycles-pp.rcu_note_context_switch 1.16 -0.1 1.03 perf-profile.children.cycles-pp.syscall_exit_to_user_mode_prepare 0.80 -0.1 0.68 perf-profile.children.cycles-pp.entry_SYSCALL_64_safe_stack 1.69 -0.1 1.60 perf-profile.children.cycles-pp.__calc_delta 0.50 ± 2% -0.1 0.43 ± 2% perf-profile.children.cycles-pp.raw_spin_rq_unlock 0.47 ± 3% -0.1 0.40 ± 2% perf-profile.children.cycles-pp.sched_update_worker 0.31 ± 3% -0.0 0.27 ± 5% perf-profile.children.cycles-pp.arch_scale_cpu_capacity 0.25 -0.0 0.22 ± 2% perf-profile.children.cycles-pp.sched_clock_noinstr 99.24 +0.1 99.32 perf-profile.children.cycles-pp.__sched_yield 69.66 +3.7 73.41 perf-profile.children.cycles-pp.entry_SYSCALL_64_after_hwframe 67.81 +4.0 71.82 perf-profile.children.cycles-pp.do_syscall_64 59.71 +5.0 64.72 perf-profile.children.cycles-pp.__x64_sys_sched_yield 2.92 +5.0 7.94 perf-profile.children.cycles-pp.update_min_vruntime 51.97 +6.0 58.01 perf-profile.children.cycles-pp.schedule 50.53 +6.2 56.74 perf-profile.children.cycles-pp.__schedule 32.38 +8.4 40.74 perf-profile.children.cycles-pp.pick_next_task_fair 30.70 +8.4 39.14 perf-profile.children.cycles-pp.pick_task_fair 23.52 +11.0 34.54 perf-profile.children.cycles-pp.update_curr 3.60 -2.2 1.36 ± 3% perf-profile.self.cycles-pp.pick_eevdf 13.76 -1.6 12.18 perf-profile.self.cycles-pp.clear_bhb_loop 5.71 -0.8 4.90 perf-profile.self.cycles-pp.__sched_yield 6.12 -0.7 5.40 perf-profile.self.cycles-pp.__schedule 3.95 -0.6 3.40 perf-profile.self.cycles-pp._raw_spin_lock 2.20 ± 6% -0.5 1.70 ± 14% perf-profile.self.cycles-pp.perf_trace_sched_stat_runtime 3.56 -0.5 3.07 perf-profile.self.cycles-pp.update_rq_clock_task 3.92 -0.5 3.44 perf-profile.self.cycles-pp.entry_SYSRETQ_unsafe_stack 3.07 -0.4 2.67 perf-profile.self.cycles-pp.entry_SYSCALL_64 2.96 -0.4 2.59 perf-profile.self.cycles-pp.do_syscall_64 3.28 -0.4 2.92 perf-profile.self.cycles-pp.native_sched_clock 2.41 -0.3 2.09 perf-profile.self.cycles-pp.do_sched_yield 3.59 -0.3 3.28 perf-profile.self.cycles-pp.pick_task_fair 2.25 -0.3 1.96 perf-profile.self.cycles-pp.syscall_return_via_sysret 1.87 -0.3 1.61 perf-profile.self.cycles-pp.entry_SYSCALL_64_after_hwframe 2.18 -0.3 1.92 perf-profile.self.cycles-pp.x64_sys_call 1.85 -0.3 1.59 perf-profile.self.cycles-pp.update_curr_se 1.73 -0.2 1.48 perf-profile.self.cycles-pp.syscall_exit_to_user_mode 1.44 ± 3% -0.2 1.26 perf-profile.self.cycles-pp.schedule 1.02 -0.2 0.86 perf-profile.self.cycles-pp.yield_task_fair 0.92 ± 4% -0.1 0.79 ± 5% perf-profile.self.cycles-pp.rcu_note_context_switch 0.79 -0.1 0.68 perf-profile.self.cycles-pp.__x64_sys_sched_yield 0.79 -0.1 0.68 perf-profile.self.cycles-pp.entry_SYSCALL_64_safe_stack 1.04 -0.1 0.94 perf-profile.self.cycles-pp.update_rq_clock 1.04 -0.1 0.93 perf-profile.self.cycles-pp.syscall_exit_to_user_mode_prepare 1.65 ± 2% -0.1 1.56 ± 3% perf-profile.self.cycles-pp.pick_next_task_fair 1.64 -0.1 1.56 perf-profile.self.cycles-pp.__calc_delta 0.41 ± 5% -0.1 0.33 ± 5% perf-profile.self.cycles-pp.testcase 0.43 -0.1 0.36 perf-profile.self.cycles-pp.raw_spin_rq_unlock 0.46 ± 3% -0.1 0.40 ± 2% perf-profile.self.cycles-pp.sched_update_worker 0.48 -0.0 0.43 perf-profile.self.cycles-pp.sched_clock_cpu 0.29 -0.0 0.26 ± 2% perf-profile.self.cycles-pp.arch_scale_cpu_capacity 0.26 -0.0 0.23 perf-profile.self.cycles-pp.sched_clock 2.58 +5.3 7.92 perf-profile.self.cycles-pp.update_min_vruntime 8.36 ± 5% +7.1 15.45 ± 2% perf-profile.self.cycles-pp.update_curr *************************************************************************************************** lkp-icl-2sp2: 128 threads 2 sockets Intel(R) Xeon(R) Gold 6338 CPU @ 2.00GHz (Ice Lake) with 256G memory ========================================================================================= compiler/cpufreq_governor/disk/fs/kconfig/load/rootfs/tbox_group/test/testcase: gcc-12/performance/1BRD_48G/xfs/x86_64-rhel-8.3/3000/debian-12-x86_64-20240206.cgz/lkp-icl-2sp2/disk_rr/aim7 commit: 7266f0a6d3 ("fs/bcachefs: Fix __wait_on_freeing_inode() definition of waitqueue entry") 538d813df3 ("sched/eevdf: Reduce the computation frequency of avg_vruntime") 7266f0a6d3bb73f4 538d813df3945cbc9d6a90ba224 ---------------- --------------------------- %stddev %change %stddev \ | \ 3.288e+09 ± 2% +21.9% 4.008e+09 ± 3% cpuidle..time 2225204 +34.7% 2996318 cpuidle..usage 65110 -10.0% 58593 ± 3% vmstat.system.cs 164997 -12.3% 144664 ± 3% vmstat.system.in 78.57 +5.4% 82.82 iostat.cpu.idle 19.74 -20.7% 15.66 ± 3% iostat.cpu.system 1.68 ± 2% -9.3% 1.52 iostat.cpu.user 0.10 ± 2% +0.0 0.11 ± 3% mpstat.cpu.all.soft% 20.61 -4.5 16.11 ± 3% mpstat.cpu.all.sys% 1.75 ± 2% -0.2 1.55 mpstat.cpu.all.usr% 38.50 -24.7% 29.00 mpstat.max_utilization_pct 20.45 ± 35% -40.8% 12.10 ± 66% sched_debug.cfs_rq:/.removed.runnable_avg.avg 93.27 ± 14% -32.3% 63.15 ± 50% sched_debug.cfs_rq:/.removed.runnable_avg.stddev 20.45 ± 35% -40.8% 12.10 ± 66% sched_debug.cfs_rq:/.removed.util_avg.avg 93.27 ± 14% -32.3% 63.15 ± 50% sched_debug.cfs_rq:/.removed.util_avg.stddev 626563 -16.4% 524042 aim7.jobs-per-min 28.92 +19.5% 34.54 aim7.time.elapsed_time 28.92 +19.5% 34.54 aim7.time.elapsed_time.max 106217 ± 2% -54.4% 48479 ± 4% aim7.time.involuntary_context_switches 2520 -20.9% 1994 aim7.time.percent_of_cpu_this_job_got 693.43 -6.0% 652.11 aim7.time.system_time 454176 +7.3% 487213 aim7.time.voluntary_context_switches 24117 ± 12% +58.8% 38288 ± 9% meminfo.Active 19543 ± 14% +81.6% 35485 ± 10% meminfo.Active(anon) 4574 ± 6% -38.7% 2803 ± 13% meminfo.Active(file) 864807 +11.5% 964505 meminfo.Dirty 866352 +11.8% 968424 meminfo.Inactive(file) 81362 ± 3% +24.8% 101538 ± 10% meminfo.Mapped 55814 ± 4% +52.6% 85147 ± 9% meminfo.Shmem 2248 ± 9% -36.3% 1432 ± 8% numa-meminfo.node0.Active(file) 435013 +12.2% 488255 numa-meminfo.node0.Dirty 435838 +12.5% 490301 ± 2% numa-meminfo.node0.Inactive(file) 18186 ± 13% +77.5% 32275 ± 8% numa-meminfo.node1.Active 15931 ± 15% +94.2% 30941 ± 9% numa-meminfo.node1.Active(anon) 2254 ± 14% -40.9% 1333 ± 19% numa-meminfo.node1.Active(file) 429153 ± 2% +10.9% 475856 ± 3% numa-meminfo.node1.Dirty 429881 ± 2% +11.1% 477485 ± 3% numa-meminfo.node1.Inactive(file) 48401 ± 5% +57.8% 76356 ± 8% numa-meminfo.node1.Shmem 570.46 ± 4% -33.5% 379.55 ± 18% numa-vmstat.node0.nr_active_file 108480 +13.4% 123017 ± 3% numa-vmstat.node0.nr_dirty 108626 +13.7% 123557 ± 3% numa-vmstat.node0.nr_inactive_file 574.96 ± 4% -33.5% 382.41 ± 20% numa-vmstat.node0.nr_zone_active_file 108616 +13.8% 123563 ± 3% numa-vmstat.node0.nr_zone_inactive_file 108480 +13.4% 123031 ± 3% numa-vmstat.node0.nr_zone_write_pending 3992 ± 15% +88.0% 7505 ± 12% numa-vmstat.node1.nr_active_anon 535.60 ± 8% -43.4% 303.23 ± 20% numa-vmstat.node1.nr_active_file 106987 ± 2% +12.1% 119954 ± 3% numa-vmstat.node1.nr_dirty 107155 ± 2% +12.3% 120335 ± 2% numa-vmstat.node1.nr_inactive_file 12109 ± 5% +55.3% 18803 ± 11% numa-vmstat.node1.nr_shmem 3992 ± 15% +88.0% 7505 ± 12% numa-vmstat.node1.nr_zone_active_anon 531.79 ± 7% -44.0% 297.86 ± 22% numa-vmstat.node1.nr_zone_active_file 107160 ± 2% +12.3% 120329 ± 2% numa-vmstat.node1.nr_zone_inactive_file 106991 ± 2% +12.1% 119949 ± 3% numa-vmstat.node1.nr_zone_write_pending 4894 ± 14% +78.8% 8753 ± 8% proc-vmstat.nr_active_anon 1177 ± 7% -47.3% 620.85 ± 11% proc-vmstat.nr_active_file 215885 +12.0% 241890 proc-vmstat.nr_dirty 1028576 +3.3% 1062036 proc-vmstat.nr_file_pages 205186 +2.5% 210328 proc-vmstat.nr_inactive_anon 216249 +12.3% 242846 proc-vmstat.nr_inactive_file 65985 +2.3% 67471 proc-vmstat.nr_kernel_stack 20669 ± 3% +23.9% 25606 ± 7% proc-vmstat.nr_mapped 13964 ± 4% +53.1% 21372 ± 7% proc-vmstat.nr_shmem 34179 +2.8% 35137 proc-vmstat.nr_slab_reclaimable 91842 +1.6% 93353 proc-vmstat.nr_slab_unreclaimable 4894 ± 14% +78.8% 8753 ± 8% proc-vmstat.nr_zone_active_anon 1177 ± 7% -47.2% 621.97 ± 11% proc-vmstat.nr_zone_active_file 205186 +2.5% 210328 proc-vmstat.nr_zone_inactive_anon 216249 +12.3% 242846 proc-vmstat.nr_zone_inactive_file 215886 +12.0% 241890 proc-vmstat.nr_zone_write_pending 1544 ±206% +815.2% 14133 ± 47% proc-vmstat.numa_hint_faults 93.17 ± 76% +9039.0% 8514 ± 63% proc-vmstat.numa_pages_migrated 33520 ± 95% +238.4% 113448 ± 18% proc-vmstat.numa_pte_updates 401728 +27.3% 511275 proc-vmstat.pgfault 93.17 ± 76% +9039.0% 8514 ± 63% proc-vmstat.pgmigrate_success 213998 +21.9% 260928 ± 3% proc-vmstat.pgpgout 16275 ± 6% +14.0% 18549 ± 5% proc-vmstat.pgreuse 1.32 ± 2% +10.9% 1.46 ± 2% perf-stat.i.MPKI 1.66e+10 ± 2% -15.6% 1.401e+10 perf-stat.i.branch-instructions 1.82 ± 2% -0.2 1.58 ± 4% perf-stat.i.branch-miss-rate% 63877077 ± 3% -15.6% 53893091 ± 2% perf-stat.i.branch-misses 20.49 +1.8 22.29 perf-stat.i.cache-miss-rate% 1.364e+08 ± 2% -12.8% 1.189e+08 perf-stat.i.cache-misses 5.295e+08 ± 2% -15.6% 4.469e+08 perf-stat.i.cache-references 70732 ± 2% -10.9% 63025 ± 2% perf-stat.i.context-switches 6.905e+10 ± 2% -18.9% 5.602e+10 perf-stat.i.cpu-cycles 2264 ± 2% -31.3% 1556 ± 3% perf-stat.i.cpu-migrations 7.571e+10 ± 2% -15.7% 6.382e+10 perf-stat.i.instructions 36.23 ± 23% -50.3% 18.01 ± 50% perf-stat.i.major-faults 11250 ± 4% +7.1% 12053 ± 3% perf-stat.i.minor-faults 11286 ± 4% +7.0% 12071 ± 3% perf-stat.i.page-faults 1.81 +3.2% 1.86 perf-stat.overall.MPKI 25.78 +0.8 26.62 perf-stat.overall.cache-miss-rate% 0.91 -3.8% 0.88 perf-stat.overall.cpi 505.72 -6.8% 471.32 perf-stat.overall.cycles-between-cache-misses 1.09 +3.9% 1.14 perf-stat.overall.ipc 1.574e+10 ± 2% -13.9% 1.355e+10 ± 2% perf-stat.ps.branch-instructions 59346827 ± 3% -12.8% 51745536 perf-stat.ps.branch-misses 1.296e+08 ± 3% -11.2% 1.151e+08 ± 2% perf-stat.ps.cache-misses 5.028e+08 ± 3% -14.0% 4.324e+08 perf-stat.ps.cache-references 67073 ± 3% -9.1% 60976 ± 2% perf-stat.ps.context-switches 6.555e+10 ± 2% -17.2% 5.425e+10 ± 2% perf-stat.ps.cpu-cycles 2163 ± 2% -29.4% 1527 ± 2% perf-stat.ps.cpu-migrations 7.176e+10 ± 2% -14.0% 6.172e+10 ± 2% perf-stat.ps.instructions 10680 ± 4% +11.3% 11882 ± 2% perf-stat.ps.minor-faults 10719 ± 4% +11.1% 11907 ± 2% perf-stat.ps.page-faults 0.01 ± 30% +511.8% 0.05 ± 19% perf-sched.sch_delay.avg.ms.__cond_resched.__wait_for_common.affine_move_task.__set_cpus_allowed_ptr.__sched_setaffinity 0.00 ±223% +1575.0% 0.01 ± 28% perf-sched.sch_delay.avg.ms.__cond_resched.dput.__fput.__x64_sys_close.do_syscall_64 0.00 ±145% +1020.0% 0.01 ± 28% perf-sched.sch_delay.avg.ms.__cond_resched.dput.terminate_walk.path_openat.do_filp_open 0.00 ±223% +1133.3% 0.01 ± 7% perf-sched.sch_delay.avg.ms.__cond_resched.task_work_run.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe 0.00 ± 62% -100.0% 0.00 perf-sched.sch_delay.avg.ms.do_nanosleep.hrtimer_nanosleep.__x64_sys_nanosleep.do_syscall_64 0.00 ± 17% +69.6% 0.01 ± 7% perf-sched.sch_delay.avg.ms.irq_thread.kthread.ret_from_fork.ret_from_fork_asm 0.00 ±145% +1180.0% 0.01 ± 73% perf-sched.sch_delay.avg.ms.irqentry_exit_to_user_mode.asm_exc_page_fault.[unknown].[unknown] 0.01 ± 33% -70.8% 0.00 ± 20% perf-sched.sch_delay.avg.ms.pipe_read.vfs_read.ksys_read.do_syscall_64 0.01 ± 9% -45.5% 0.00 ± 72% perf-sched.sch_delay.avg.ms.rcu_gp_kthread.kthread.ret_from_fork.ret_from_fork_asm 0.00 ± 10% +32.1% 0.01 ± 6% perf-sched.sch_delay.avg.ms.schedule_timeout.__wait_for_common.wait_for_completion_state.kernel_clone 0.01 ± 15% +91.4% 0.01 ± 43% perf-sched.sch_delay.avg.ms.schedule_timeout.kcompactd.kthread.ret_from_fork 0.01 ± 8% +100.0% 0.01 ± 37% perf-sched.sch_delay.avg.ms.schedule_timeout.rcu_gp_fqs_loop.rcu_gp_kthread.kthread 0.01 ± 11% +38.5% 0.01 perf-sched.sch_delay.avg.ms.schedule_timeout.xfsaild.kthread.ret_from_fork 0.00 ±142% +369.2% 0.01 ± 15% perf-sched.sch_delay.max.ms.__cond_resched.__dentry_kill.shrink_dentry_list.shrink_dcache_parent.d_invalidate 0.73 ± 99% +286.5% 2.84 ± 13% perf-sched.sch_delay.max.ms.__cond_resched.__wait_for_common.affine_move_task.__set_cpus_allowed_ptr.__sched_setaffinity 0.00 ±223% +2900.0% 0.02 ± 44% perf-sched.sch_delay.max.ms.__cond_resched.dput.__fput.__x64_sys_close.do_syscall_64 0.00 ±145% +1640.0% 0.01 ± 40% perf-sched.sch_delay.max.ms.__cond_resched.dput.terminate_walk.path_openat.do_filp_open 0.00 ±143% +350.0% 0.01 ± 7% perf-sched.sch_delay.max.ms.__cond_resched.process_one_work.worker_thread.kthread.ret_from_fork 0.00 ±223% +1750.0% 0.02 ± 5% perf-sched.sch_delay.max.ms.__cond_resched.task_work_run.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe 0.01 ± 88% -100.0% 0.00 perf-sched.sch_delay.max.ms.do_nanosleep.hrtimer_nanosleep.__x64_sys_nanosleep.do_syscall_64 0.01 ± 11% +2974.3% 0.36 ± 40% perf-sched.sch_delay.max.ms.do_wait.kernel_wait4.__do_sys_wait4.do_syscall_64 0.01 ± 11% +100.0% 0.01 ± 14% perf-sched.sch_delay.max.ms.irq_thread.kthread.ret_from_fork.ret_from_fork_asm 0.00 ±145% +1480.0% 0.01 ± 65% perf-sched.sch_delay.max.ms.irqentry_exit_to_user_mode.asm_exc_page_fault.[unknown].[unknown] 0.01 ± 18% +5433.3% 0.58 ±133% perf-sched.sch_delay.max.ms.schedule_hrtimeout_range_clock.do_poll.constprop.0.do_sys_poll 0.01 ± 10% +516.3% 0.05 ± 88% perf-sched.sch_delay.max.ms.schedule_timeout.__wait_for_common.wait_for_completion_state.kernel_clone 0.01 ± 16% +708.9% 0.06 ±144% perf-sched.sch_delay.max.ms.schedule_timeout.kcompactd.kthread.ret_from_fork 0.01 ± 21% +43222.4% 4.19 ±127% perf-sched.sch_delay.max.ms.schedule_timeout.rcu_gp_fqs_loop.rcu_gp_kthread.kthread 0.04 ±102% +79610.1% 30.29 ±221% perf-sched.sch_delay.max.ms.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe.[unknown] 0.01 ± 8% +378.5% 0.05 ± 13% perf-sched.total_sch_delay.average.ms 4.82 ± 31% +3815.7% 188.77 ± 2% perf-sched.total_sch_delay.max.ms 2004 ± 24% +11113.1% 224730 perf-sched.total_wait_and_delay.count.ms 1367 ± 41% +246.4% 4735 ± 3% perf-sched.total_wait_and_delay.max.ms 1367 ± 41% +246.4% 4735 ± 3% perf-sched.total_wait_time.max.ms 3.25 ± 53% -100.0% 0.00 perf-sched.wait_and_delay.avg.ms.__cond_resched.__wait_for_common.affine_move_task.__set_cpus_allowed_ptr.__sched_setaffinity 166.61 ±141% +349.9% 749.60 ± 10% perf-sched.wait_and_delay.avg.ms.__x64_sys_pause.do_syscall_64.entry_SYSCALL_64_after_hwframe.[unknown] 111.12 ±141% +570.0% 744.49 ± 10% perf-sched.wait_and_delay.avg.ms.do_nanosleep.hrtimer_nanosleep.common_nsleep.__x64_sys_clock_nanosleep 0.29 ± 4% +457.1% 1.64 ± 14% perf-sched.wait_and_delay.avg.ms.do_wait.kernel_wait4.__do_sys_wait4.do_syscall_64 77.78 ±106% +198.6% 232.23 ± 3% perf-sched.wait_and_delay.avg.ms.irq_thread.kthread.ret_from_fork.ret_from_fork_asm 130.49 ± 36% +216.7% 413.24 ± 7% perf-sched.wait_and_delay.avg.ms.schedule_hrtimeout_range_clock.ep_poll.do_epoll_wait.__x64_sys_epoll_wait 0.47 ± 5% -100.0% 0.00 perf-sched.wait_and_delay.avg.ms.schedule_timeout.__wait_for_common.wait_for_completion_state.kernel_clone 252.00 ± 50% +80.0% 453.54 perf-sched.wait_and_delay.avg.ms.schedule_timeout.kcompactd.kthread.ret_from_fork 19.90 ± 18% -79.4% 4.10 perf-sched.wait_and_delay.avg.ms.schedule_timeout.rcu_gp_fqs_loop.rcu_gp_kthread.kthread 48.98 -100.0% 0.00 perf-sched.wait_and_delay.avg.ms.schedule_timeout.xfsaild.kthread.ret_from_fork 239.32 ± 81% -97.8% 5.28 ± 9% perf-sched.wait_and_delay.avg.ms.worker_thread.kthread.ret_from_fork.ret_from_fork_asm 586.67 ± 25% -100.0% 0.00 perf-sched.wait_and_delay.count.__cond_resched.__wait_for_common.affine_move_task.__set_cpus_allowed_ptr.__sched_setaffinity 1.33 ±141% +650.0% 10.00 perf-sched.wait_and_delay.count.__x64_sys_pause.do_syscall_64.entry_SYSCALL_64_after_hwframe.[unknown] 1.00 ±141% +416.7% 5.17 ± 7% perf-sched.wait_and_delay.count.do_nanosleep.hrtimer_nanosleep.common_nsleep.__x64_sys_clock_nanosleep 31.17 ± 11% -100.0% 0.00 perf-sched.wait_and_delay.count.do_task_dead.do_exit.__x64_sys_exit.x64_sys_call.do_syscall_64 66.17 ± 14% +4026.4% 2730 ± 19% perf-sched.wait_and_delay.count.do_task_dead.do_exit.do_group_exit.__x64_sys_exit_group.x64_sys_call 51.67 ± 21% +4399.7% 2324 ± 18% perf-sched.wait_and_delay.count.do_wait.kernel_wait4.__do_sys_wait4.do_syscall_64 3.00 ±107% +522.2% 18.67 ± 5% perf-sched.wait_and_delay.count.irq_thread.kthread.ret_from_fork.ret_from_fork_asm 169.50 ± 22% +1190.6% 2187 ± 4% perf-sched.wait_and_delay.count.pipe_read.vfs_read.ksys_read.do_syscall_64 3.83 ±102% +3339.1% 131.83 ± 3% perf-sched.wait_and_delay.count.schedule_hrtimeout_range_clock.do_poll.constprop.0.do_sys_poll 31.50 ± 25% -100.0% 0.00 perf-sched.wait_and_delay.count.schedule_timeout.__wait_for_common.wait_for_completion_state.kernel_clone 4.67 ± 58% +328.6% 20.00 perf-sched.wait_and_delay.count.schedule_timeout.kcompactd.kthread.ret_from_fork 64.17 ± 37% +1794.8% 1215 perf-sched.wait_and_delay.count.schedule_timeout.rcu_gp_fqs_loop.rcu_gp_kthread.kthread 27.50 ± 38% -100.0% 0.00 perf-sched.wait_and_delay.count.schedule_timeout.xfsaild.kthread.ret_from_fork 647.50 ± 30% +380.3% 3109 ± 5% perf-sched.wait_and_delay.count.smpboot_thread_fn.kthread.ret_from_fork.ret_from_fork_asm 1.00 ±223% +1.8e+05% 1760 perf-sched.wait_and_delay.count.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe.[unknown] 217.17 ± 32% +26170.7% 57051 perf-sched.wait_and_delay.count.worker_thread.kthread.ret_from_fork.ret_from_fork_asm 1249 ± 51% -100.0% 0.00 perf-sched.wait_and_delay.max.ms.__cond_resched.__wait_for_common.affine_move_task.__set_cpus_allowed_ptr.__sched_setaffinity 1.30 ± 17% +28306.6% 368.62 ± 31% perf-sched.wait_and_delay.max.ms.do_wait.kernel_wait4.__do_sys_wait4.do_syscall_64 283.33 ±104% +246.8% 982.65 ± 3% perf-sched.wait_and_delay.max.ms.irq_thread.kthread.ret_from_fork.ret_from_fork_asm 336.68 ±140% +957.4% 3559 ± 50% perf-sched.wait_and_delay.max.ms.pipe_read.vfs_read.ksys_read.do_syscall_64 333.67 ±141% +1049.9% 3837 ± 38% perf-sched.wait_and_delay.max.ms.schedule_hrtimeout_range_clock.do_poll.constprop.0.do_sys_poll 1.91 -100.0% 0.00 perf-sched.wait_and_delay.max.ms.schedule_timeout.__wait_for_common.wait_for_completion_state.kernel_clone 329.52 ± 29% -82.1% 59.00 ±104% perf-sched.wait_and_delay.max.ms.schedule_timeout.rcu_gp_fqs_loop.rcu_gp_kthread.kthread 52.00 -100.0% 0.00 perf-sched.wait_and_delay.max.ms.schedule_timeout.xfsaild.kthread.ret_from_fork 975.69 ± 20% +150.2% 2440 ± 3% perf-sched.wait_and_delay.max.ms.smpboot_thread_fn.kthread.ret_from_fork.ret_from_fork_asm 154.83 ±223% +458.4% 864.51 ± 35% perf-sched.wait_and_delay.max.ms.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe.[unknown] 1312 ± 47% +222.7% 4234 ± 8% perf-sched.wait_and_delay.max.ms.worker_thread.kthread.ret_from_fork.ret_from_fork_asm 0.44 ±223% +25614.4% 112.42 ± 18% perf-sched.wait_time.avg.ms.__cond_resched.dput.__fput.__x64_sys_close.do_syscall_64 0.00 ±223% +1.7e+06% 54.23 ±119% perf-sched.wait_time.avg.ms.__cond_resched.kmem_cache_alloc_noprof.alloc_empty_file.path_openat.do_filp_open 166.61 ±141% +349.9% 749.60 ± 10% perf-sched.wait_time.avg.ms.__x64_sys_pause.do_syscall_64.entry_SYSCALL_64_after_hwframe.[unknown] 111.12 ±141% +570.0% 744.48 ± 10% perf-sched.wait_time.avg.ms.do_nanosleep.hrtimer_nanosleep.common_nsleep.__x64_sys_clock_nanosleep 0.29 ± 4% +467.6% 1.63 ± 14% perf-sched.wait_time.avg.ms.do_wait.kernel_wait4.__do_sys_wait4.do_syscall_64 94.46 ± 72% +145.9% 232.23 ± 3% perf-sched.wait_time.avg.ms.irq_thread.kthread.ret_from_fork.ret_from_fork_asm 0.01 ±144% +2.2e+06% 127.86 ± 6% perf-sched.wait_time.avg.ms.irqentry_exit_to_user_mode.asm_sysvec_apic_timer_interrupt.[unknown].[unknown] 0.00 ±223% +4333.3% 0.02 ± 27% perf-sched.wait_time.avg.ms.irqentry_exit_to_user_mode.asm_sysvec_reschedule_ipi.[unknown].[unknown] 130.47 ± 36% +216.7% 413.23 ± 7% perf-sched.wait_time.avg.ms.schedule_hrtimeout_range_clock.ep_poll.do_epoll_wait.__x64_sys_epoll_wait 0.46 ± 5% +53.4% 0.71 perf-sched.wait_time.avg.ms.schedule_timeout.__wait_for_common.wait_for_completion_state.kernel_clone 252.00 ± 50% +80.0% 453.53 perf-sched.wait_time.avg.ms.schedule_timeout.kcompactd.kthread.ret_from_fork 19.89 ± 18% -79.5% 4.09 perf-sched.wait_time.avg.ms.schedule_timeout.rcu_gp_fqs_loop.rcu_gp_kthread.kthread 239.29 ± 81% -97.8% 5.25 ± 9% perf-sched.wait_time.avg.ms.worker_thread.kthread.ret_from_fork.ret_from_fork_asm 0.44 ±223% +39762.2% 174.26 ± 8% perf-sched.wait_time.max.ms.__cond_resched.dput.__fput.__x64_sys_close.do_syscall_64 0.00 ±223% +2.8e+06% 88.88 ± 99% perf-sched.wait_time.max.ms.__cond_resched.kmem_cache_alloc_noprof.alloc_empty_file.path_openat.do_filp_open 1.29 ± 17% +28522.9% 368.61 ± 31% perf-sched.wait_time.max.ms.do_wait.kernel_wait4.__do_sys_wait4.do_syscall_64 349.88 ± 67% +180.9% 982.64 ± 3% perf-sched.wait_time.max.ms.irq_thread.kthread.ret_from_fork.ret_from_fork_asm 0.02 ±191% +1.1e+06% 187.74 perf-sched.wait_time.max.ms.irqentry_exit_to_user_mode.asm_sysvec_apic_timer_interrupt.[unknown].[unknown] 0.00 ±223% +6166.7% 0.03 ± 47% perf-sched.wait_time.max.ms.irqentry_exit_to_user_mode.asm_sysvec_reschedule_ipi.[unknown].[unknown] 336.67 ±140% +957.4% 3559 ± 50% perf-sched.wait_time.max.ms.pipe_read.vfs_read.ksys_read.do_syscall_64 4.66 ± 10% -42.8% 2.66 ± 70% perf-sched.wait_time.max.ms.rcu_gp_kthread.kthread.ret_from_fork.ret_from_fork_asm 333.67 ±141% +1049.9% 3836 ± 38% perf-sched.wait_time.max.ms.schedule_hrtimeout_range_clock.do_poll.constprop.0.do_sys_poll 1.91 +40.7% 2.69 ± 2% perf-sched.wait_time.max.ms.schedule_timeout.__wait_for_common.wait_for_completion_state.kernel_clone 329.51 ± 29% -82.6% 57.33 ±109% perf-sched.wait_time.max.ms.schedule_timeout.rcu_gp_fqs_loop.rcu_gp_kthread.kthread 975.68 ± 20% +150.2% 2440 ± 3% perf-sched.wait_time.max.ms.smpboot_thread_fn.kthread.ret_from_fork.ret_from_fork_asm 154.84 ±223% +458.3% 864.50 ± 35% perf-sched.wait_time.max.ms.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe.[unknown] 1312 ± 47% +222.7% 4233 ± 8% perf-sched.wait_time.max.ms.worker_thread.kthread.ret_from_fork.ret_from_fork_asm 7.94 -1.8 6.10 ± 3% perf-profile.calltrace.cycles-pp.down_write.open_last_lookups.path_openat.do_filp_open.do_sys_openat2 7.94 -1.8 6.10 ± 3% perf-profile.calltrace.cycles-pp.rwsem_down_write_slowpath.down_write.open_last_lookups.path_openat.do_filp_open 9.08 -1.8 7.25 ± 3% perf-profile.calltrace.cycles-pp.open_last_lookups.path_openat.do_filp_open.do_sys_openat2.__x64_sys_creat 9.20 -1.8 7.36 ± 3% perf-profile.calltrace.cycles-pp.do_sys_openat2.__x64_sys_creat.do_syscall_64.entry_SYSCALL_64_after_hwframe.creat64 9.16 -1.8 7.33 ± 3% perf-profile.calltrace.cycles-pp.path_openat.do_filp_open.do_sys_openat2.__x64_sys_creat.do_syscall_64 9.16 -1.8 7.33 ± 3% perf-profile.calltrace.cycles-pp.do_filp_open.do_sys_openat2.__x64_sys_creat.do_syscall_64.entry_SYSCALL_64_after_hwframe 9.20 -1.8 7.37 ± 3% perf-profile.calltrace.cycles-pp.__x64_sys_creat.do_syscall_64.entry_SYSCALL_64_after_hwframe.creat64 9.26 -1.8 7.44 ± 3% perf-profile.calltrace.cycles-pp.do_syscall_64.entry_SYSCALL_64_after_hwframe.creat64 9.26 -1.8 7.44 ± 3% perf-profile.calltrace.cycles-pp.entry_SYSCALL_64_after_hwframe.creat64 9.28 -1.8 7.46 ± 3% perf-profile.calltrace.cycles-pp.creat64 7.29 -1.8 5.51 ± 4% perf-profile.calltrace.cycles-pp.rwsem_optimistic_spin.rwsem_down_write_slowpath.down_write.open_last_lookups.path_openat 8.80 -1.6 7.17 ± 3% perf-profile.calltrace.cycles-pp.__x64_sys_unlink.do_syscall_64.entry_SYSCALL_64_after_hwframe.unlink 8.80 -1.6 7.17 ± 3% perf-profile.calltrace.cycles-pp.do_unlinkat.__x64_sys_unlink.do_syscall_64.entry_SYSCALL_64_after_hwframe.unlink 8.86 -1.6 7.24 ± 3% perf-profile.calltrace.cycles-pp.do_syscall_64.entry_SYSCALL_64_after_hwframe.unlink 8.86 -1.6 7.24 ± 3% perf-profile.calltrace.cycles-pp.entry_SYSCALL_64_after_hwframe.unlink 8.89 -1.6 7.27 ± 3% perf-profile.calltrace.cycles-pp.unlink 7.94 -1.6 6.33 ± 3% perf-profile.calltrace.cycles-pp.down_write.do_unlinkat.__x64_sys_unlink.do_syscall_64.entry_SYSCALL_64_after_hwframe 7.94 -1.6 6.32 ± 3% perf-profile.calltrace.cycles-pp.rwsem_down_write_slowpath.down_write.do_unlinkat.__x64_sys_unlink.do_syscall_64 6.42 -1.6 4.85 ± 4% perf-profile.calltrace.cycles-pp.osq_lock.rwsem_optimistic_spin.rwsem_down_write_slowpath.down_write.open_last_lookups 7.28 -1.5 5.74 ± 4% perf-profile.calltrace.cycles-pp.rwsem_optimistic_spin.rwsem_down_write_slowpath.down_write.do_unlinkat.__x64_sys_unlink 6.46 -1.4 5.09 ± 4% perf-profile.calltrace.cycles-pp.osq_lock.rwsem_optimistic_spin.rwsem_down_write_slowpath.down_write.do_unlinkat 0.85 -0.2 0.65 perf-profile.calltrace.cycles-pp.rwsem_spin_on_owner.rwsem_optimistic_spin.rwsem_down_write_slowpath.down_write.open_last_lookups 0.79 -0.2 0.63 perf-profile.calltrace.cycles-pp.rwsem_spin_on_owner.rwsem_optimistic_spin.rwsem_down_write_slowpath.down_write.do_unlinkat 1.75 -0.2 1.60 ± 4% perf-profile.calltrace.cycles-pp.worker_thread.kthread.ret_from_fork.ret_from_fork_asm 1.78 -0.2 1.63 ± 4% perf-profile.calltrace.cycles-pp.kthread.ret_from_fork.ret_from_fork_asm 1.78 -0.1 1.64 ± 4% perf-profile.calltrace.cycles-pp.ret_from_fork.ret_from_fork_asm 1.78 -0.1 1.64 ± 4% perf-profile.calltrace.cycles-pp.ret_from_fork_asm 1.31 -0.1 1.23 perf-profile.calltrace.cycles-pp.xfs_inactive.xfs_inodegc_worker.process_one_work.worker_thread.kthread 1.33 -0.1 1.25 perf-profile.calltrace.cycles-pp.xfs_inodegc_worker.process_one_work.worker_thread.kthread.ret_from_fork 0.96 -0.1 0.88 perf-profile.calltrace.cycles-pp.xfs_inactive_ifree.xfs_inactive.xfs_inodegc_worker.process_one_work.worker_thread 0.69 -0.1 0.63 perf-profile.calltrace.cycles-pp.xfs_ifree.xfs_inactive_ifree.xfs_inactive.xfs_inodegc_worker.process_one_work 0.65 -0.1 0.60 perf-profile.calltrace.cycles-pp.xfs_difree.xfs_inode_uninit.xfs_ifree.xfs_inactive_ifree.xfs_inactive 0.68 -0.1 0.62 perf-profile.calltrace.cycles-pp.xfs_inode_uninit.xfs_ifree.xfs_inactive_ifree.xfs_inactive.xfs_inodegc_worker 0.70 +0.0 0.72 perf-profile.calltrace.cycles-pp.xfs_ilock.xfs_file_buffered_read.xfs_file_read_iter.vfs_read.ksys_read 0.51 +0.0 0.53 perf-profile.calltrace.cycles-pp.down_read.xfs_ilock.xfs_file_buffered_read.xfs_file_read_iter.vfs_read 1.03 +0.0 1.06 perf-profile.calltrace.cycles-pp.__alloc_pages_noprof.alloc_pages_mpol_noprof.folio_alloc_noprof.__filemap_get_folio.iomap_write_begin 0.51 +0.0 0.54 ± 2% perf-profile.calltrace.cycles-pp.down_write.xfs_ilock_for_iomap.xfs_buffered_write_iomap_begin.iomap_iter.iomap_file_buffered_write 0.54 +0.0 0.57 perf-profile.calltrace.cycles-pp.xas_load.filemap_get_read_batch.filemap_get_pages.filemap_read.xfs_file_buffered_read 0.61 +0.0 0.64 perf-profile.calltrace.cycles-pp.xfs_iunlock.xfs_file_buffered_write.vfs_write.ksys_write.do_syscall_64 1.19 +0.0 1.22 perf-profile.calltrace.cycles-pp.alloc_pages_mpol_noprof.folio_alloc_noprof.__filemap_get_folio.iomap_write_begin.iomap_write_iter 1.26 +0.0 1.29 perf-profile.calltrace.cycles-pp.folio_alloc_noprof.__filemap_get_folio.iomap_write_begin.iomap_write_iter.iomap_file_buffered_write 0.78 +0.0 0.82 perf-profile.calltrace.cycles-pp.xfs_ilock_for_iomap.xfs_buffered_write_iomap_begin.iomap_iter.iomap_file_buffered_write.xfs_file_buffered_write 1.00 +0.0 1.04 perf-profile.calltrace.cycles-pp.filemap_get_entry.__filemap_get_folio.iomap_write_begin.iomap_write_iter.iomap_file_buffered_write 1.04 +0.0 1.07 perf-profile.calltrace.cycles-pp.fault_in_readable.fault_in_iov_iter_readable.iomap_write_iter.iomap_file_buffered_write.xfs_file_buffered_write 1.19 +0.0 1.23 perf-profile.calltrace.cycles-pp.fault_in_iov_iter_readable.iomap_write_iter.iomap_file_buffered_write.xfs_file_buffered_write.vfs_write 1.53 +0.0 1.57 perf-profile.calltrace.cycles-pp.clear_bhb_loop.read 1.11 +0.0 1.15 perf-profile.calltrace.cycles-pp.iomap_iter_advance.iomap_iter.iomap_file_buffered_write.xfs_file_buffered_write.vfs_write 1.02 +0.0 1.06 perf-profile.calltrace.cycles-pp.ksys_lseek.do_syscall_64.entry_SYSCALL_64_after_hwframe.llseek 1.60 +0.0 1.64 perf-profile.calltrace.cycles-pp.clear_bhb_loop.write 1.43 +0.0 1.48 perf-profile.calltrace.cycles-pp.entry_SYSCALL_64.llseek 1.64 +0.1 1.70 perf-profile.calltrace.cycles-pp.copy_page_from_iter_atomic.iomap_write_iter.iomap_file_buffered_write.xfs_file_buffered_write.vfs_write 1.54 +0.1 1.59 perf-profile.calltrace.cycles-pp.clear_bhb_loop.llseek 0.86 +0.1 0.91 perf-profile.calltrace.cycles-pp.iomap_set_range_uptodate.__iomap_write_begin.iomap_write_begin.iomap_write_iter.iomap_file_buffered_write 1.89 +0.1 1.95 perf-profile.calltrace.cycles-pp.do_syscall_64.entry_SYSCALL_64_after_hwframe.llseek 1.99 +0.1 2.04 perf-profile.calltrace.cycles-pp.filemap_get_read_batch.filemap_get_pages.filemap_read.xfs_file_buffered_read.xfs_file_read_iter 2.34 +0.1 2.40 perf-profile.calltrace.cycles-pp.filemap_get_pages.filemap_read.xfs_file_buffered_read.xfs_file_read_iter.vfs_read 2.14 +0.1 2.21 perf-profile.calltrace.cycles-pp.entry_SYSCALL_64_after_hwframe.llseek 1.57 +0.1 1.64 perf-profile.calltrace.cycles-pp.entry_SYSCALL_64.write 1.62 +0.1 1.70 ± 2% perf-profile.calltrace.cycles-pp.entry_SYSCALL_64.read 2.37 +0.1 2.51 perf-profile.calltrace.cycles-pp.iomap_set_range_uptodate.iomap_write_end.iomap_write_iter.iomap_file_buffered_write.xfs_file_buffered_write 4.01 +0.2 4.16 perf-profile.calltrace.cycles-pp.xfs_buffered_write_iomap_begin.iomap_iter.iomap_file_buffered_write.xfs_file_buffered_write.vfs_write 6.70 +0.2 6.85 perf-profile.calltrace.cycles-pp.__filemap_get_folio.iomap_write_begin.iomap_write_iter.iomap_file_buffered_write.xfs_file_buffered_write 2.68 +0.2 2.85 perf-profile.calltrace.cycles-pp.memset_orig.zero_user_segments.__iomap_write_begin.iomap_write_begin.iomap_write_iter 2.78 +0.2 2.95 perf-profile.calltrace.cycles-pp.zero_user_segments.__iomap_write_begin.iomap_write_begin.iomap_write_iter.iomap_file_buffered_write 6.22 +0.2 6.41 perf-profile.calltrace.cycles-pp.llseek 4.69 +0.2 4.89 perf-profile.calltrace.cycles-pp.iomap_write_end.iomap_write_iter.iomap_file_buffered_write.xfs_file_buffered_write.vfs_write 5.90 +0.2 6.11 perf-profile.calltrace.cycles-pp.iomap_iter.iomap_file_buffered_write.xfs_file_buffered_write.vfs_write.ksys_write 4.23 +0.2 4.47 perf-profile.calltrace.cycles-pp.__iomap_write_begin.iomap_write_begin.iomap_write_iter.iomap_file_buffered_write.xfs_file_buffered_write 4.88 +0.3 5.15 perf-profile.calltrace.cycles-pp._copy_to_iter.copy_page_to_iter.filemap_read.xfs_file_buffered_read.xfs_file_read_iter 5.10 +0.3 5.38 perf-profile.calltrace.cycles-pp.copy_page_to_iter.filemap_read.xfs_file_buffered_read.xfs_file_read_iter.vfs_read 14.66 +0.4 15.06 perf-profile.calltrace.cycles-pp.filemap_read.xfs_file_buffered_read.xfs_file_read_iter.vfs_read.ksys_read 1.01 ± 2% +0.4 1.42 ± 2% perf-profile.calltrace.cycles-pp.sysvec_apic_timer_interrupt.asm_sysvec_apic_timer_interrupt.acpi_safe_halt.acpi_idle_enter.cpuidle_enter_state 0.08 ±223% +0.4 0.51 perf-profile.calltrace.cycles-pp.xfs_break_layouts.xfs_file_write_checks.xfs_file_buffered_write.vfs_write.ksys_write 11.72 +0.4 12.16 perf-profile.calltrace.cycles-pp.iomap_write_begin.iomap_write_iter.iomap_file_buffered_write.xfs_file_buffered_write.vfs_write 16.26 +0.4 16.70 perf-profile.calltrace.cycles-pp.xfs_file_buffered_read.xfs_file_read_iter.vfs_read.ksys_read.do_syscall_64 16.66 +0.4 17.10 perf-profile.calltrace.cycles-pp.xfs_file_read_iter.vfs_read.ksys_read.do_syscall_64.entry_SYSCALL_64_after_hwframe 18.40 +0.5 18.89 perf-profile.calltrace.cycles-pp.vfs_read.ksys_read.do_syscall_64.entry_SYSCALL_64_after_hwframe.read 19.10 +0.5 19.62 perf-profile.calltrace.cycles-pp.ksys_read.do_syscall_64.entry_SYSCALL_64_after_hwframe.read 1.37 +0.5 1.90 ± 2% perf-profile.calltrace.cycles-pp.acpi_safe_halt.acpi_idle_enter.cpuidle_enter_state.cpuidle_enter.cpuidle_idle_call 0.17 ±141% +0.5 0.70 ± 5% perf-profile.calltrace.cycles-pp.__sysvec_apic_timer_interrupt.sysvec_apic_timer_interrupt.asm_sysvec_apic_timer_interrupt.acpi_safe_halt.acpi_idle_enter 0.00 +0.5 0.53 perf-profile.calltrace.cycles-pp.handle_softirqs.__irq_exit_rcu.sysvec_apic_timer_interrupt.asm_sysvec_apic_timer_interrupt.acpi_safe_halt 0.00 +0.6 0.55 perf-profile.calltrace.cycles-pp.__irq_exit_rcu.sysvec_apic_timer_interrupt.asm_sysvec_apic_timer_interrupt.acpi_safe_halt.acpi_idle_enter 20.13 +0.6 20.68 perf-profile.calltrace.cycles-pp.do_syscall_64.entry_SYSCALL_64_after_hwframe.read 20.38 +0.6 20.94 perf-profile.calltrace.cycles-pp.entry_SYSCALL_64_after_hwframe.read 24.42 +0.7 25.08 perf-profile.calltrace.cycles-pp.read 0.00 +0.7 0.68 ± 5% perf-profile.calltrace.cycles-pp.hrtimer_interrupt.__sysvec_apic_timer_interrupt.sysvec_apic_timer_interrupt.asm_sysvec_apic_timer_interrupt.acpi_safe_halt 22.42 +0.8 23.27 perf-profile.calltrace.cycles-pp.iomap_write_iter.iomap_file_buffered_write.xfs_file_buffered_write.vfs_write.ksys_write 2.11 +0.9 2.99 perf-profile.calltrace.cycles-pp.acpi_idle_enter.cpuidle_enter_state.cpuidle_enter.cpuidle_idle_call.do_idle 2.16 +0.9 3.08 perf-profile.calltrace.cycles-pp.cpuidle_enter_state.cpuidle_enter.cpuidle_idle_call.do_idle.cpu_startup_entry 2.17 +0.9 3.09 perf-profile.calltrace.cycles-pp.cpuidle_enter.cpuidle_idle_call.do_idle.cpu_startup_entry.start_secondary 2.32 +1.0 3.29 perf-profile.calltrace.cycles-pp.cpuidle_idle_call.do_idle.cpu_startup_entry.start_secondary.common_startup_64 2.52 +1.0 3.51 perf-profile.calltrace.cycles-pp.cpu_startup_entry.start_secondary.common_startup_64 2.52 +1.0 3.51 perf-profile.calltrace.cycles-pp.start_secondary.common_startup_64 2.51 +1.0 3.50 perf-profile.calltrace.cycles-pp.do_idle.cpu_startup_entry.start_secondary.common_startup_64 2.54 +1.0 3.54 perf-profile.calltrace.cycles-pp.common_startup_64 2.06 ± 2% +1.1 3.13 perf-profile.calltrace.cycles-pp.asm_sysvec_apic_timer_interrupt.acpi_safe_halt.acpi_idle_enter.cpuidle_enter_state.cpuidle_enter 29.32 +1.1 30.42 perf-profile.calltrace.cycles-pp.iomap_file_buffered_write.xfs_file_buffered_write.vfs_write.ksys_write.do_syscall_64 33.36 +1.2 34.57 perf-profile.calltrace.cycles-pp.xfs_file_buffered_write.vfs_write.ksys_write.do_syscall_64.entry_SYSCALL_64_after_hwframe 36.26 +1.3 37.56 perf-profile.calltrace.cycles-pp.vfs_write.ksys_write.do_syscall_64.entry_SYSCALL_64_after_hwframe.write 37.09 +1.4 38.44 perf-profile.calltrace.cycles-pp.ksys_write.do_syscall_64.entry_SYSCALL_64_after_hwframe.write 38.13 +1.4 39.53 perf-profile.calltrace.cycles-pp.do_syscall_64.entry_SYSCALL_64_after_hwframe.write 38.38 +1.4 39.79 perf-profile.calltrace.cycles-pp.entry_SYSCALL_64_after_hwframe.write 41.30 +2.0 43.33 perf-profile.calltrace.cycles-pp.write 15.88 -3.5 12.42 ± 3% perf-profile.children.cycles-pp.rwsem_down_write_slowpath 17.02 -3.4 13.59 ± 3% perf-profile.children.cycles-pp.down_write 14.58 -3.3 11.25 ± 4% perf-profile.children.cycles-pp.rwsem_optimistic_spin 12.90 -3.0 9.94 ± 4% perf-profile.children.cycles-pp.osq_lock 9.09 -1.8 7.26 ± 3% perf-profile.children.cycles-pp.open_last_lookups 9.24 -1.8 7.41 ± 3% perf-profile.children.cycles-pp.path_openat 9.20 -1.8 7.37 ± 3% perf-profile.children.cycles-pp.__x64_sys_creat 9.30 -1.8 7.47 ± 3% perf-profile.children.cycles-pp.do_sys_openat2 9.24 -1.8 7.41 ± 3% perf-profile.children.cycles-pp.do_filp_open 9.29 -1.8 7.48 ± 3% perf-profile.children.cycles-pp.creat64 8.80 -1.6 7.17 ± 3% perf-profile.children.cycles-pp.__x64_sys_unlink 8.80 -1.6 7.17 ± 3% perf-profile.children.cycles-pp.do_unlinkat 8.90 -1.6 7.28 ± 3% perf-profile.children.cycles-pp.unlink 83.14 -1.3 81.79 perf-profile.children.cycles-pp.do_syscall_64 83.86 -1.3 82.52 perf-profile.children.cycles-pp.entry_SYSCALL_64_after_hwframe 2.29 -0.4 1.86 perf-profile.children.cycles-pp.rwsem_spin_on_owner 1.06 -0.3 0.81 perf-profile.children.cycles-pp.pick_next_task_fair 1.01 -0.2 0.77 ± 2% perf-profile.children.cycles-pp.sched_balance_newidle 1.28 -0.2 1.04 perf-profile.children.cycles-pp.schedule 1.42 -0.2 1.19 perf-profile.children.cycles-pp.__schedule 1.01 -0.2 0.80 ± 2% perf-profile.children.cycles-pp.sched_balance_rq 0.86 -0.2 0.68 perf-profile.children.cycles-pp.sched_balance_find_src_group 0.85 -0.2 0.67 perf-profile.children.cycles-pp.update_sd_lb_stats 0.79 -0.2 0.62 perf-profile.children.cycles-pp.update_sg_lb_stats 1.78 -0.2 1.63 ± 4% perf-profile.children.cycles-pp.kthread 1.75 -0.2 1.60 ± 4% perf-profile.children.cycles-pp.worker_thread 1.78 -0.1 1.64 ± 4% perf-profile.children.cycles-pp.ret_from_fork 1.78 -0.1 1.64 ± 4% perf-profile.children.cycles-pp.ret_from_fork_asm 1.31 -0.1 1.23 perf-profile.children.cycles-pp.xfs_inactive 1.33 -0.1 1.25 perf-profile.children.cycles-pp.xfs_inodegc_worker 0.48 -0.1 0.41 perf-profile.children.cycles-pp.___down_common 0.48 -0.1 0.41 perf-profile.children.cycles-pp.__down 0.54 -0.1 0.47 perf-profile.children.cycles-pp.down 0.48 -0.1 0.41 perf-profile.children.cycles-pp.schedule_timeout 0.55 -0.1 0.48 perf-profile.children.cycles-pp.xfs_buf_lock 0.96 -0.1 0.88 perf-profile.children.cycles-pp.xfs_inactive_ifree 0.75 -0.1 0.68 perf-profile.children.cycles-pp.xfs_buf_get_map 0.63 -0.1 0.56 perf-profile.children.cycles-pp.xfs_read_agi 0.77 -0.1 0.70 perf-profile.children.cycles-pp.xfs_buf_read_map 0.56 -0.1 0.50 perf-profile.children.cycles-pp.xfs_buf_find_lock 0.67 -0.1 0.60 perf-profile.children.cycles-pp.xfs_buf_lookup 0.91 -0.1 0.84 perf-profile.children.cycles-pp.xfs_trans_read_buf_map 0.58 -0.1 0.52 perf-profile.children.cycles-pp.xfs_ialloc_read_agi 0.69 -0.1 0.63 perf-profile.children.cycles-pp.xfs_ifree 0.59 -0.1 0.53 ± 2% perf-profile.children.cycles-pp.schedule_preempt_disabled 0.65 -0.1 0.60 perf-profile.children.cycles-pp.xfs_difree 0.68 -0.1 0.62 perf-profile.children.cycles-pp.xfs_inode_uninit 0.27 ± 4% -0.0 0.23 ± 3% perf-profile.children.cycles-pp.task_tick_fair 0.17 ± 2% -0.0 0.15 ± 2% perf-profile.children.cycles-pp.update_load_avg 0.14 ± 3% -0.0 0.11 ± 4% perf-profile.children.cycles-pp.idle_cpu 0.29 -0.0 0.27 perf-profile.children.cycles-pp.xfs_buf_item_format_segment 0.08 ± 4% -0.0 0.06 ± 7% perf-profile.children.cycles-pp.cpu_util 0.30 ± 2% -0.0 0.28 ± 2% perf-profile.children.cycles-pp.xfs_buf_item_format 0.12 ± 5% -0.0 0.10 ± 3% perf-profile.children.cycles-pp.update_cfs_group 0.09 ± 4% -0.0 0.08 ± 6% perf-profile.children.cycles-pp._find_next_and_bit 0.18 ± 2% -0.0 0.17 ± 2% perf-profile.children.cycles-pp.xlog_copy_iovec 0.06 +0.0 0.07 perf-profile.children.cycles-pp.kmem_cache_alloc_lru_noprof 0.23 +0.0 0.24 perf-profile.children.cycles-pp.xfs_buffered_write_iomap_end 0.29 +0.0 0.30 perf-profile.children.cycles-pp._raw_spin_lock 0.05 +0.0 0.06 ± 7% perf-profile.children.cycles-pp.native_sched_clock 0.13 ± 3% +0.0 0.15 ± 3% perf-profile.children.cycles-pp.sched_ttwu_pending 0.16 ± 2% +0.0 0.18 ± 2% perf-profile.children.cycles-pp.__flush_smp_call_function_queue 0.08 ± 6% +0.0 0.09 ± 4% perf-profile.children.cycles-pp.__update_blocked_fair 0.09 +0.0 0.11 ± 3% perf-profile.children.cycles-pp.sched_balance_softirq 0.07 +0.0 0.09 ± 4% perf-profile.children.cycles-pp.update_rq_clock 0.44 +0.0 0.46 perf-profile.children.cycles-pp.generic_write_checks 0.23 +0.0 0.25 ± 2% perf-profile.children.cycles-pp.try_to_wake_up 0.48 +0.0 0.50 perf-profile.children.cycles-pp.xfs_iext_lookup_extent 0.55 +0.0 0.57 perf-profile.children.cycles-pp.xfs_break_layouts 0.10 ± 4% +0.0 0.13 ± 2% perf-profile.children.cycles-pp.kick_pool 0.12 ± 3% +0.0 0.14 ± 2% perf-profile.children.cycles-pp.__queue_work 0.14 ± 2% +0.0 0.17 ± 2% perf-profile.children.cycles-pp.run_timer_softirq 0.14 +0.0 0.16 ± 3% perf-profile.children.cycles-pp.__run_timers 0.12 ± 4% +0.0 0.15 perf-profile.children.cycles-pp.sched_balance_update_blocked_averages 0.06 ± 8% +0.0 0.08 ± 4% perf-profile.children.cycles-pp.enqueue_dl_entity 0.68 +0.0 0.71 perf-profile.children.cycles-pp.xas_store 0.12 ± 3% +0.0 0.15 ± 2% perf-profile.children.cycles-pp.call_timer_fn 1.05 +0.0 1.07 perf-profile.children.cycles-pp.__alloc_pages_noprof 0.05 +0.0 0.08 perf-profile.children.cycles-pp.hrtimer_start_range_ns 0.06 ± 8% +0.0 0.09 ± 5% perf-profile.children.cycles-pp.dl_server_start 1.02 +0.0 1.05 perf-profile.children.cycles-pp.up_write 0.17 +0.0 0.20 perf-profile.children.cycles-pp.ttwu_do_activate 0.10 ± 3% +0.0 0.13 ± 7% perf-profile.children.cycles-pp.menu_select 0.16 ± 3% +0.0 0.19 perf-profile.children.cycles-pp.enqueue_task_fair 1.20 +0.0 1.24 perf-profile.children.cycles-pp.__cond_resched 1.21 +0.0 1.24 perf-profile.children.cycles-pp.alloc_pages_mpol_noprof 1.27 +0.0 1.30 perf-profile.children.cycles-pp.folio_alloc_noprof 1.10 +0.0 1.13 perf-profile.children.cycles-pp.fault_in_readable 1.14 +0.0 1.18 perf-profile.children.cycles-pp.iomap_iter_advance 0.10 ± 4% +0.0 0.14 ± 2% perf-profile.children.cycles-pp.sched_balance_domains 1.06 +0.0 1.09 perf-profile.children.cycles-pp.filemap_get_entry 1.22 +0.0 1.25 perf-profile.children.cycles-pp.xas_load 0.82 +0.0 0.86 perf-profile.children.cycles-pp.xfs_ilock_for_iomap 1.68 +0.0 1.72 perf-profile.children.cycles-pp.xfs_ilock 1.24 +0.0 1.28 perf-profile.children.cycles-pp.fault_in_iov_iter_readable 0.21 ± 2% +0.0 0.26 perf-profile.children.cycles-pp.sysvec_call_function_single 1.09 +0.0 1.14 perf-profile.children.cycles-pp.ksys_lseek 0.88 +0.0 0.93 perf-profile.children.cycles-pp.__mod_memcg_lruvec_state 1.67 +0.1 1.72 perf-profile.children.cycles-pp.copy_page_from_iter_atomic 0.47 +0.1 0.52 perf-profile.children.cycles-pp.task_work_run 0.46 +0.1 0.51 perf-profile.children.cycles-pp.task_mm_cid_work 0.73 +0.1 0.78 perf-profile.children.cycles-pp.update_process_times 2.04 +0.1 2.10 perf-profile.children.cycles-pp.filemap_get_read_batch 0.06 ± 19% +0.1 0.12 ± 14% perf-profile.children.cycles-pp.tick_irq_enter 2.22 +0.1 2.28 perf-profile.children.cycles-pp.xfs_file_write_checks 1.86 +0.1 1.92 perf-profile.children.cycles-pp.xfs_iunlock 0.07 ± 15% +0.1 0.13 ± 12% perf-profile.children.cycles-pp.irq_enter_rcu 0.01 ±223% +0.1 0.07 perf-profile.children.cycles-pp.start_dl_timer 2.38 +0.1 2.44 perf-profile.children.cycles-pp.filemap_get_pages 0.00 +0.1 0.07 ± 7% perf-profile.children.cycles-pp.__hrtimer_start_range_ns 1.30 +0.1 1.37 perf-profile.children.cycles-pp.syscall_exit_to_user_mode 0.44 +0.1 0.51 ± 2% perf-profile.children.cycles-pp.asm_sysvec_call_function_single 0.81 +0.1 0.88 ± 2% perf-profile.children.cycles-pp.tick_nohz_handler 0.20 ± 11% +0.1 0.28 ± 10% perf-profile.children.cycles-pp.clockevents_program_event 0.85 +0.1 0.93 ± 2% perf-profile.children.cycles-pp.__hrtimer_run_queues 1.75 +0.1 1.85 ± 3% perf-profile.children.cycles-pp.fdget_pos 0.60 +0.1 0.71 perf-profile.children.cycles-pp.handle_softirqs 0.62 +0.1 0.74 perf-profile.children.cycles-pp.__irq_exit_rcu 2.76 +0.1 2.88 perf-profile.children.cycles-pp.entry_SYSCALL_64 0.24 ± 6% +0.1 0.38 ± 9% perf-profile.children.cycles-pp.ktime_get 4.73 +0.1 4.88 perf-profile.children.cycles-pp.clear_bhb_loop 4.22 +0.2 4.37 perf-profile.children.cycles-pp.xfs_buffered_write_iomap_begin 6.80 +0.2 6.95 perf-profile.children.cycles-pp.__filemap_get_folio 2.70 +0.2 2.87 perf-profile.children.cycles-pp.memset_orig 2.79 +0.2 2.96 perf-profile.children.cycles-pp.zero_user_segments 1.10 ± 2% +0.2 1.29 ± 4% perf-profile.children.cycles-pp.hrtimer_interrupt 1.13 ± 2% +0.2 1.32 ± 4% perf-profile.children.cycles-pp.__sysvec_apic_timer_interrupt 3.25 +0.2 3.44 perf-profile.children.cycles-pp.iomap_set_range_uptodate 6.25 +0.2 6.46 perf-profile.children.cycles-pp.llseek 4.74 +0.2 4.95 perf-profile.children.cycles-pp.iomap_write_end 6.04 +0.2 6.25 perf-profile.children.cycles-pp.iomap_iter 4.30 +0.2 4.55 perf-profile.children.cycles-pp.__iomap_write_begin 4.90 +0.3 5.17 perf-profile.children.cycles-pp._copy_to_iter 5.14 +0.3 5.42 perf-profile.children.cycles-pp.copy_page_to_iter 1.84 +0.4 2.20 ± 2% perf-profile.children.cycles-pp.sysvec_apic_timer_interrupt 14.77 +0.4 15.17 perf-profile.children.cycles-pp.filemap_read 16.70 +0.4 17.13 perf-profile.children.cycles-pp.xfs_file_read_iter 11.81 +0.4 12.25 perf-profile.children.cycles-pp.iomap_write_begin 16.35 +0.4 16.79 perf-profile.children.cycles-pp.xfs_file_buffered_read 18.46 +0.5 18.94 perf-profile.children.cycles-pp.vfs_read 19.18 +0.5 19.70 perf-profile.children.cycles-pp.ksys_read 24.52 +0.7 25.18 perf-profile.children.cycles-pp.read 2.45 +0.7 3.16 perf-profile.children.cycles-pp.asm_sysvec_apic_timer_interrupt 22.61 +0.9 23.46 perf-profile.children.cycles-pp.iomap_write_iter 2.12 +0.9 3.01 perf-profile.children.cycles-pp.acpi_safe_halt 2.12 +0.9 3.02 perf-profile.children.cycles-pp.acpi_idle_enter 2.18 +0.9 3.11 perf-profile.children.cycles-pp.cpuidle_enter_state 2.19 +0.9 3.12 perf-profile.children.cycles-pp.cpuidle_enter 2.34 +1.0 3.33 perf-profile.children.cycles-pp.cpuidle_idle_call 2.52 +1.0 3.51 perf-profile.children.cycles-pp.start_secondary 2.54 +1.0 3.54 perf-profile.children.cycles-pp.common_startup_64 2.54 +1.0 3.54 perf-profile.children.cycles-pp.cpu_startup_entry 2.54 +1.0 3.54 perf-profile.children.cycles-pp.do_idle 29.39 +1.1 30.49 perf-profile.children.cycles-pp.iomap_file_buffered_write 33.47 +1.2 34.68 perf-profile.children.cycles-pp.xfs_file_buffered_write 36.35 +1.3 37.66 perf-profile.children.cycles-pp.vfs_write 37.18 +1.4 38.54 perf-profile.children.cycles-pp.ksys_write 41.97 +1.5 43.49 perf-profile.children.cycles-pp.write 12.68 -2.9 9.78 ± 4% perf-profile.self.cycles-pp.osq_lock 2.26 -0.4 1.83 perf-profile.self.cycles-pp.rwsem_spin_on_owner 0.55 -0.1 0.44 perf-profile.self.cycles-pp.update_sg_lb_stats 0.12 ± 4% -0.0 0.11 ± 4% perf-profile.self.cycles-pp.idle_cpu 0.12 ± 5% -0.0 0.10 ± 3% perf-profile.self.cycles-pp.update_cfs_group 0.10 ± 4% -0.0 0.09 ± 4% perf-profile.self.cycles-pp.update_load_avg 0.22 ± 2% +0.0 0.24 ± 2% perf-profile.self.cycles-pp.cgroup_rstat_updated 0.28 +0.0 0.30 perf-profile.self.cycles-pp.__folio_batch_add_and_move 0.72 +0.0 0.74 perf-profile.self.cycles-pp.entry_SYSCALL_64_after_hwframe 0.42 +0.0 0.44 perf-profile.self.cycles-pp.folio_unlock 0.05 +0.0 0.07 ± 5% perf-profile.self.cycles-pp.update_rq_clock 0.05 ± 7% +0.0 0.07 ± 5% perf-profile.self.cycles-pp.menu_select 0.54 +0.0 0.56 perf-profile.self.cycles-pp.xfs_file_buffered_write 0.46 +0.0 0.48 perf-profile.self.cycles-pp.xfs_iext_lookup_extent 0.43 ± 2% +0.0 0.45 perf-profile.self.cycles-pp.iomap_write_end 1.00 +0.0 1.03 perf-profile.self.cycles-pp.do_syscall_64 0.58 +0.0 0.61 perf-profile.self.cycles-pp.iomap_write_begin 0.95 +0.0 0.98 perf-profile.self.cycles-pp.iomap_write_iter 0.78 +0.0 0.81 perf-profile.self.cycles-pp.__filemap_get_folio 0.81 +0.0 0.84 perf-profile.self.cycles-pp.up_write 0.86 +0.0 0.88 perf-profile.self.cycles-pp.xas_load 0.76 +0.0 0.79 perf-profile.self.cycles-pp.iomap_file_buffered_write 0.99 +0.0 1.02 perf-profile.self.cycles-pp.vfs_read 1.06 +0.0 1.09 perf-profile.self.cycles-pp.xfs_buffered_write_iomap_begin 0.60 +0.0 0.64 perf-profile.self.cycles-pp.filemap_get_entry 1.06 +0.0 1.10 perf-profile.self.cycles-pp.fault_in_readable 0.65 +0.0 0.69 perf-profile.self.cycles-pp.__mod_memcg_lruvec_state 0.88 ± 2% +0.0 0.92 perf-profile.self.cycles-pp.entry_SYSCALL_64 1.20 +0.0 1.24 perf-profile.self.cycles-pp.vfs_write 1.10 +0.0 1.14 perf-profile.self.cycles-pp.iomap_iter_advance 0.74 +0.0 0.78 ± 2% perf-profile.self.cycles-pp.llseek 0.42 ± 2% +0.0 0.46 perf-profile.self.cycles-pp.task_mm_cid_work 1.64 +0.1 1.69 perf-profile.self.cycles-pp.copy_page_from_iter_atomic 0.00 +0.1 0.06 ± 9% perf-profile.self.cycles-pp.sched_balance_domains 1.62 ± 2% +0.1 1.69 perf-profile.self.cycles-pp.filemap_read 1.66 +0.1 1.75 ± 3% perf-profile.self.cycles-pp.fdget_pos 0.22 ± 6% +0.1 0.35 ± 11% perf-profile.self.cycles-pp.ktime_get 4.68 +0.1 4.82 perf-profile.self.cycles-pp.clear_bhb_loop 2.68 +0.2 2.84 perf-profile.self.cycles-pp.memset_orig 3.20 +0.2 3.39 perf-profile.self.cycles-pp.iomap_set_range_uptodate 4.84 +0.3 5.11 perf-profile.self.cycles-pp._copy_to_iter 0.83 +0.4 1.23 perf-profile.self.cycles-pp.acpi_safe_halt *************************************************************************************************** lkp-skl-fpga01: 104 threads 2 sockets (Skylake) with 192G memory ========================================================================================= compiler/cpufreq_governor/kconfig/mode/nr_task/rootfs/tbox_group/test/testcase: gcc-12/performance/x86_64-rhel-8.3/thread/100%/debian-12-x86_64-20240206.cgz/lkp-skl-fpga01/sched_yield/will-it-scale commit: 7266f0a6d3 ("fs/bcachefs: Fix __wait_on_freeing_inode() definition of waitqueue entry") 538d813df3 ("sched/eevdf: Reduce the computation frequency of avg_vruntime") 7266f0a6d3bb73f4 538d813df3945cbc9d6a90ba224 ---------------- --------------------------- %stddev %change %stddev \ | \ 1.977e+08 ± 6% +40.4% 2.776e+08 ± 15% cpuidle..time 44.46 -14.6% 37.96 ± 3% vmstat.cpu.us 54.34 +6.3 60.67 ± 2% mpstat.cpu.all.sys% 44.71 -6.5 38.16 ± 3% mpstat.cpu.all.usr% 309.97 ± 86% -89.9% 31.19 ±211% perf-sched.wait_time.avg.ms.__cond_resched.smpboot_thread_fn.kthread.ret_from_fork.ret_from_fork_asm 1343 ± 93% -86.3% 183.97 ±215% perf-sched.wait_time.max.ms.__cond_resched.smpboot_thread_fn.kthread.ret_from_fork.ret_from_fork_asm 65774130 -4.1% 63075951 will-it-scale.104.threads 632443 -4.1% 606499 will-it-scale.per_thread_ops 65774130 -4.1% 63075951 will-it-scale.workload 1.496e+10 -1.5% 1.474e+10 perf-stat.i.branch-instructions 0.73 +0.1 0.87 ± 4% perf-stat.i.branch-miss-rate% 1.098e+08 +16.7% 1.281e+08 ± 4% perf-stat.i.branch-misses 178.36 -2.2% 174.43 perf-stat.i.cpu-migrations 0.73 +0.1 0.87 ± 4% perf-stat.overall.branch-miss-rate% 340697 +4.5% 355878 perf-stat.overall.path-length 1.492e+10 -1.5% 1.469e+10 perf-stat.ps.branch-instructions 1.095e+08 +16.6% 1.277e+08 ± 4% perf-stat.ps.branch-misses 177.83 -2.2% 173.84 perf-stat.ps.cpu-migrations 22.46 -1.7 20.74 perf-profile.calltrace.cycles-pp.syscall_return_via_sysret.__sched_yield 15.97 -1.6 14.32 ± 2% perf-profile.calltrace.cycles-pp.entry_SYSRETQ_unsafe_stack.__sched_yield 2.12 ± 19% -0.9 1.21 ± 36% perf-profile.calltrace.cycles-pp.testcase 1.12 ± 11% -0.5 0.57 ± 45% perf-profile.calltrace.cycles-pp.perf_trace_sched_stat_runtime.update_curr.pick_task_fair.pick_next_task_fair.__schedule 3.43 -0.4 3.06 perf-profile.calltrace.cycles-pp.do_sched_yield.__x64_sys_sched_yield.do_syscall_64.entry_SYSCALL_64_after_hwframe.__sched_yield 7.89 -0.2 7.65 perf-profile.calltrace.cycles-pp.entry_SYSCALL_64.__sched_yield 0.99 ± 2% -0.2 0.79 ± 3% perf-profile.calltrace.cycles-pp.raw_spin_rq_lock_nested.do_sched_yield.__x64_sys_sched_yield.do_syscall_64.entry_SYSCALL_64_after_hwframe 0.90 ± 2% -0.2 0.71 ± 3% perf-profile.calltrace.cycles-pp._raw_spin_lock.raw_spin_rq_lock_nested.do_sched_yield.__x64_sys_sched_yield.do_syscall_64 1.11 ± 2% -0.1 1.03 ± 2% perf-profile.calltrace.cycles-pp.yield_task_fair.do_sched_yield.__x64_sys_sched_yield.do_syscall_64.entry_SYSCALL_64_after_hwframe 0.58 ± 9% -0.1 0.53 perf-profile.calltrace.cycles-pp.__calc_delta.update_curr.pick_task_fair.pick_next_task_fair.__schedule 0.52 ± 2% +0.2 0.71 ± 8% perf-profile.calltrace.cycles-pp.update_curr_se.update_curr.pick_task_fair.pick_next_task_fair.__schedule 0.90 ± 2% +0.5 1.44 ± 13% perf-profile.calltrace.cycles-pp.update_curr_dl_se.update_curr.pick_task_fair.pick_next_task_fair.__schedule 0.00 +0.7 0.71 ± 16% perf-profile.calltrace.cycles-pp.syscall_exit_to_user_mode_prepare.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe.__sched_yield 1.08 +0.7 1.80 ± 10% perf-profile.calltrace.cycles-pp.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe.__sched_yield 5.86 ± 3% +0.8 6.62 ± 4% perf-profile.calltrace.cycles-pp.entry_SYSCALL_64_safe_stack.__sched_yield 97.15 +1.0 98.11 perf-profile.calltrace.cycles-pp.__sched_yield 1.58 +2.1 3.68 perf-profile.calltrace.cycles-pp.update_min_vruntime.update_curr.pick_task_fair.pick_next_task_fair.__schedule 26.96 +3.5 30.49 ± 2% perf-profile.calltrace.cycles-pp.__x64_sys_sched_yield.do_syscall_64.entry_SYSCALL_64_after_hwframe.__sched_yield 23.27 +4.0 27.22 ± 3% perf-profile.calltrace.cycles-pp.schedule.__x64_sys_sched_yield.do_syscall_64.entry_SYSCALL_64_after_hwframe.__sched_yield 22.59 +4.0 26.62 ± 3% perf-profile.calltrace.cycles-pp.__schedule.schedule.__x64_sys_sched_yield.do_syscall_64.entry_SYSCALL_64_after_hwframe 15.38 ± 2% +4.8 20.13 ± 3% perf-profile.calltrace.cycles-pp.pick_next_task_fair.__schedule.schedule.__x64_sys_sched_yield.do_syscall_64 45.87 +4.8 50.67 perf-profile.calltrace.cycles-pp.entry_SYSCALL_64_after_hwframe.__sched_yield 33.18 +5.6 38.77 ± 2% perf-profile.calltrace.cycles-pp.do_syscall_64.entry_SYSCALL_64_after_hwframe.__sched_yield 13.14 +5.7 18.82 ± 3% perf-profile.calltrace.cycles-pp.pick_task_fair.pick_next_task_fair.__schedule.schedule.__x64_sys_sched_yield 9.46 +5.9 15.34 ± 3% perf-profile.calltrace.cycles-pp.update_curr.pick_task_fair.pick_next_task_fair.__schedule.schedule 22.62 -1.7 20.88 perf-profile.children.cycles-pp.syscall_return_via_sysret 16.99 -1.7 15.30 ± 2% perf-profile.children.cycles-pp.entry_SYSRETQ_unsafe_stack 2.12 ± 19% -0.9 1.21 ± 36% perf-profile.children.cycles-pp.testcase 1.12 ± 11% -0.5 0.61 ± 32% perf-profile.children.cycles-pp.perf_trace_sched_stat_runtime 3.61 -0.4 3.21 perf-profile.children.cycles-pp.do_sched_yield 1.59 -0.2 1.36 ± 3% perf-profile.children.cycles-pp._raw_spin_lock 1.02 ± 2% -0.2 0.82 ± 3% perf-profile.children.cycles-pp.raw_spin_rq_lock_nested 1.12 ± 2% -0.1 1.03 ± 2% perf-profile.children.cycles-pp.yield_task_fair 0.60 ± 9% -0.1 0.54 perf-profile.children.cycles-pp.__calc_delta 0.19 ± 3% -0.0 0.14 ± 6% perf-profile.children.cycles-pp.sched_update_worker 0.07 ± 5% +0.0 0.08 perf-profile.children.cycles-pp.task_tick_fair 0.11 ± 3% +0.0 0.13 ± 2% perf-profile.children.cycles-pp.sched_tick 0.24 ± 9% +0.0 0.28 ± 3% perf-profile.children.cycles-pp.update_process_times 0.09 ± 4% +0.0 0.13 ± 12% perf-profile.children.cycles-pp.arch_scale_cpu_capacity 0.28 ± 8% +0.0 0.32 ± 3% perf-profile.children.cycles-pp.tick_nohz_handler 0.34 ± 7% +0.0 0.38 ± 2% perf-profile.children.cycles-pp.__hrtimer_run_queues 0.40 ± 6% +0.0 0.44 ± 2% perf-profile.children.cycles-pp.__sysvec_apic_timer_interrupt 0.46 ± 5% +0.0 0.51 ± 2% perf-profile.children.cycles-pp.asm_sysvec_apic_timer_interrupt 0.39 ± 7% +0.0 0.44 ± 2% perf-profile.children.cycles-pp.hrtimer_interrupt 0.05 +0.0 0.10 ± 4% perf-profile.children.cycles-pp.sched_yield@plt 0.42 ± 6% +0.0 0.47 ± 2% perf-profile.children.cycles-pp.sysvec_apic_timer_interrupt 1.11 +0.1 1.16 ± 3% perf-profile.children.cycles-pp.update_rq_clock_task 0.50 ± 2% +0.1 0.64 ± 9% perf-profile.children.cycles-pp.dl_scaled_delta_exec 0.57 +0.2 0.74 ± 8% perf-profile.children.cycles-pp.update_curr_se 9.44 +0.2 9.63 perf-profile.children.cycles-pp.entry_SYSCALL_64 3.20 ± 3% +0.4 3.57 ± 3% perf-profile.children.cycles-pp.entry_SYSCALL_64_safe_stack 0.38 ± 4% +0.4 0.75 ± 16% perf-profile.children.cycles-pp.syscall_exit_to_user_mode_prepare 0.97 ± 2% +0.5 1.50 ± 12% perf-profile.children.cycles-pp.update_curr_dl_se 1.13 +0.7 1.85 ± 10% perf-profile.children.cycles-pp.syscall_exit_to_user_mode 97.52 +0.9 98.46 perf-profile.children.cycles-pp.__sched_yield 1.59 +2.1 3.70 perf-profile.children.cycles-pp.update_min_vruntime 27.20 +3.5 30.72 ± 2% perf-profile.children.cycles-pp.__x64_sys_sched_yield 23.29 +3.9 27.24 ± 3% perf-profile.children.cycles-pp.schedule 22.77 +4.0 26.78 ± 3% perf-profile.children.cycles-pp.__schedule 15.44 ± 2% +4.8 20.19 ± 3% perf-profile.children.cycles-pp.pick_next_task_fair 46.20 +4.8 50.97 perf-profile.children.cycles-pp.entry_SYSCALL_64_after_hwframe 13.50 +5.6 19.09 ± 3% perf-profile.children.cycles-pp.pick_task_fair 33.34 +5.6 38.94 ± 2% perf-profile.children.cycles-pp.do_syscall_64 9.89 +5.7 15.60 ± 3% perf-profile.children.cycles-pp.update_curr 22.55 -1.7 20.84 perf-profile.self.cycles-pp.syscall_return_via_sysret 16.83 -1.7 15.15 ± 2% perf-profile.self.cycles-pp.entry_SYSRETQ_unsafe_stack 1.84 ± 21% -0.9 0.97 ± 44% perf-profile.self.cycles-pp.testcase 13.04 -0.8 12.20 perf-profile.self.cycles-pp.entry_SYSCALL_64_after_hwframe 1.88 ± 7% -0.8 1.06 ± 8% perf-profile.self.cycles-pp.pick_next_task_fair 3.97 ± 2% -0.8 3.20 ± 6% perf-profile.self.cycles-pp.__schedule 2.82 ± 8% -0.6 2.18 ± 2% perf-profile.self.cycles-pp.__sched_yield 1.09 ± 11% -0.5 0.59 ± 32% perf-profile.self.cycles-pp.perf_trace_sched_stat_runtime 1.54 -0.2 1.30 ± 4% perf-profile.self.cycles-pp._raw_spin_lock 1.20 -0.1 1.09 ± 2% perf-profile.self.cycles-pp.do_sched_yield 0.52 ± 2% -0.1 0.45 ± 2% perf-profile.self.cycles-pp.schedule 1.00 ± 2% -0.1 0.94 ± 2% perf-profile.self.cycles-pp.yield_task_fair 0.58 ± 8% -0.1 0.53 perf-profile.self.cycles-pp.__calc_delta 0.55 -0.0 0.52 perf-profile.self.cycles-pp.entry_SYSCALL_64_safe_stack 0.17 ± 2% -0.0 0.14 ± 4% perf-profile.self.cycles-pp.sched_update_worker 0.25 ± 2% -0.0 0.23 ± 3% perf-profile.self.cycles-pp.__x64_sys_sched_yield 0.08 ± 5% +0.0 0.10 ± 6% perf-profile.self.cycles-pp.arch_scale_cpu_capacity 0.05 +0.0 0.10 ± 5% perf-profile.self.cycles-pp.sched_yield@plt 0.44 ± 2% +0.1 0.58 ± 9% perf-profile.self.cycles-pp.dl_scaled_delta_exec 0.50 ± 2% +0.2 0.68 ± 8% perf-profile.self.cycles-pp.update_curr_se 8.34 +0.2 8.56 perf-profile.self.cycles-pp.entry_SYSCALL_64 0.72 +0.4 1.08 ± 7% perf-profile.self.cycles-pp.syscall_exit_to_user_mode 0.34 ± 3% +0.4 0.70 ± 16% perf-profile.self.cycles-pp.syscall_exit_to_user_mode_prepare 0.49 ± 2% +0.4 0.88 ± 15% perf-profile.self.cycles-pp.update_curr_dl_se 4.30 ± 3% +1.4 5.66 ± 5% perf-profile.self.cycles-pp.do_syscall_64 1.47 ± 2% +2.2 3.68 perf-profile.self.cycles-pp.update_min_vruntime 4.05 +3.4 7.47 ± 4% perf-profile.self.cycles-pp.update_curr Disclaimer: Results have been estimated based on internal Intel analysis and are provided for informational purposes only. Any difference in system hardware or software design or configuration may affect actual performance. -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
The current code subtracts the value of curr from avg_vruntime and
avg_load during runtime. Then, every time avg_vruntime() and
vruntime_eligible() are called, the values of curr need to be added to
the avg_vruntime and avg_load.
There is no need to subtract curr’s load from avg_load during runtime.
Instead, we only need to calculate the incremental change and update
tot_vruntime whenever curr’s time is updated. This can effectively reduce
the number of calculations for curr’s time.
To better represent their functions, rename the original avg_vruntime and
avg_load to tot_vruntime and tot_load, respectively, which more accurately
describes their roles in the computation.
Signed-off-by: Xavier <xavier_qy@163.com>
---
Note:
To address the performance decline in V2, the V3 patch reduces the unnecessary
calculations of avg_vruntime.
kernel/sched/fair.c | 104 ++++++++++++++++++++++++-------------------
kernel/sched/sched.h | 3 +-
2 files changed, 59 insertions(+), 48 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 6512258dc71..cbcdf8686fe 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -606,8 +606,8 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
* Which we track using:
*
* v0 := cfs_rq->min_vruntime
- * \Sum (v_i - v0) * w_i := cfs_rq->avg_vruntime
- * \Sum w_i := cfs_rq->avg_load
+ * \Sum (v_i - v0) * w_i := cfs_rq->tot_vruntime
+ * \Sum w_i := cfs_rq->tot_load
*
* Since min_vruntime is a monotonic increasing variable that closely tracks
* the per-task service, these deltas: (v_i - v), will be in the order of the
@@ -617,14 +617,28 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
*
* As measured, the max (key * weight) value was ~44 bits for a kernel build.
*/
+static inline void avg_vruntime_update(struct cfs_rq *cfs_rq)
+{
+ s64 tot_vruntime = cfs_rq->tot_vruntime;
+
+ /* sign flips effective floor / ceiling */
+ if (cfs_rq->tot_load) {
+ if (tot_vruntime < 0)
+ tot_vruntime -= (cfs_rq->tot_load - 1);
+ cfs_rq->avg_vruntime = div_s64(tot_vruntime, cfs_rq->tot_load);
+ } else {
+ cfs_rq->avg_vruntime = cfs_rq->tot_vruntime;
+ }
+}
+
static void
avg_vruntime_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
unsigned long weight = scale_load_down(se->load.weight);
s64 key = entity_key(cfs_rq, se);
- cfs_rq->avg_vruntime += key * weight;
- cfs_rq->avg_load += weight;
+ cfs_rq->tot_vruntime += key * weight;
+ cfs_rq->tot_load += weight;
}
static void
@@ -633,17 +647,26 @@ avg_vruntime_sub(struct cfs_rq *cfs_rq, struct sched_entity *se)
unsigned long weight = scale_load_down(se->load.weight);
s64 key = entity_key(cfs_rq, se);
- cfs_rq->avg_vruntime -= key * weight;
- cfs_rq->avg_load -= weight;
+ cfs_rq->tot_vruntime -= key * weight;
+ cfs_rq->tot_load -= weight;
+}
+
+static inline
+void tot_vruntime_update_for_curr(struct cfs_rq *cfs_rq, s64 delta)
+{
+ struct sched_entity *curr = cfs_rq->curr;
+ unsigned long weight = scale_load_down(curr->load.weight);
+
+ cfs_rq->tot_vruntime += delta * weight;
}
static inline
-void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
+void tot_vruntime_update_for_minv(struct cfs_rq *cfs_rq, s64 delta)
{
/*
- * v' = v + d ==> avg_vruntime' = avg_runtime - d*avg_load
+ * v' = v + d ==> avg_runtime' = tot_runtime - d*tot_load
*/
- cfs_rq->avg_vruntime -= cfs_rq->avg_load * delta;
+ cfs_rq->tot_vruntime -= cfs_rq->tot_load * delta;
}
/*
@@ -652,25 +675,9 @@ void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
*/
u64 avg_vruntime(struct cfs_rq *cfs_rq)
{
- struct sched_entity *curr = cfs_rq->curr;
- s64 avg = cfs_rq->avg_vruntime;
- long load = cfs_rq->avg_load;
-
- if (curr && curr->on_rq) {
- unsigned long weight = scale_load_down(curr->load.weight);
-
- avg += entity_key(cfs_rq, curr) * weight;
- load += weight;
- }
-
- if (load) {
- /* sign flips effective floor / ceiling */
- if (avg < 0)
- avg -= (load - 1);
- avg = div_s64(avg, load);
- }
+ avg_vruntime_update(cfs_rq);
- return cfs_rq->min_vruntime + avg;
+ return cfs_rq->min_vruntime + cfs_rq->avg_vruntime;
}
/*
@@ -725,18 +732,8 @@ static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
*/
static int vruntime_eligible(struct cfs_rq *cfs_rq, u64 vruntime)
{
- struct sched_entity *curr = cfs_rq->curr;
- s64 avg = cfs_rq->avg_vruntime;
- long load = cfs_rq->avg_load;
-
- if (curr && curr->on_rq) {
- unsigned long weight = scale_load_down(curr->load.weight);
-
- avg += entity_key(cfs_rq, curr) * weight;
- load += weight;
- }
-
- return avg >= (s64)(vruntime - cfs_rq->min_vruntime) * load;
+ return cfs_rq->tot_vruntime >=
+ (s64)(vruntime - cfs_rq->min_vruntime) * (s64)cfs_rq->tot_load;
}
int entity_eligible(struct cfs_rq *cfs_rq, struct sched_entity *se)
@@ -752,7 +749,7 @@ static u64 __update_min_vruntime(struct cfs_rq *cfs_rq, u64 vruntime)
*/
s64 delta = (s64)(vruntime - min_vruntime);
if (delta > 0) {
- avg_vruntime_update(cfs_rq, delta);
+ tot_vruntime_update_for_minv(cfs_rq, delta);
min_vruntime = vruntime;
}
return min_vruntime;
@@ -851,7 +848,6 @@ RB_DECLARE_CALLBACKS(static, min_vruntime_cb, struct sched_entity,
*/
static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
- avg_vruntime_add(cfs_rq, se);
se->min_vruntime = se->vruntime;
se->min_slice = se->slice;
rb_add_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
@@ -862,7 +858,6 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
rb_erase_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
&min_vruntime_cb);
- avg_vruntime_sub(cfs_rq, se);
}
struct sched_entity *__pick_root_entity(struct cfs_rq *cfs_rq)
@@ -1219,6 +1214,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
struct rq *rq = rq_of(cfs_rq);
s64 delta_exec;
bool resched;
+ s64 vdelta_exec;
if (unlikely(!curr))
return;
@@ -1227,8 +1223,11 @@ static void update_curr(struct cfs_rq *cfs_rq)
if (unlikely(delta_exec <= 0))
return;
- curr->vruntime += calc_delta_fair(delta_exec, curr);
+ vdelta_exec = calc_delta_fair(delta_exec, curr);
+ curr->vruntime += vdelta_exec;
resched = update_deadline(cfs_rq, curr);
+
+ tot_vruntime_update_for_curr(cfs_rq, vdelta_exec);
update_min_vruntime(cfs_rq);
if (entity_is_task(curr)) {
@@ -3883,6 +3882,8 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
avruntime = avg_vruntime(cfs_rq);
if (!curr)
__dequeue_entity(cfs_rq, se);
+
+ avg_vruntime_sub(cfs_rq, se);
update_load_sub(&cfs_rq->load, se->load.weight);
}
dequeue_load_avg(cfs_rq, se);
@@ -3913,6 +3914,8 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
if (!curr)
__enqueue_entity(cfs_rq, se);
+ avg_vruntime_add(cfs_rq, se);
+
/*
* The entity's vruntime has been adjusted, so let's check
* whether the rq-wide min_vruntime needs updated too. Since
@@ -5281,7 +5284,6 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
* EEVDF: placement strategy #1 / #2
*/
if (sched_feat(PLACE_LAG) && cfs_rq->nr_running && se->vlag) {
- struct sched_entity *curr = cfs_rq->curr;
unsigned long load;
lag = se->vlag;
@@ -5338,9 +5340,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
*
* vl_i = (W + w_i)*vl'_i / W
*/
- load = cfs_rq->avg_load;
- if (curr && curr->on_rq)
- load += scale_load_down(curr->load.weight);
+ load = cfs_rq->tot_load;
lag *= load + scale_load_down(se->load.weight);
if (WARN_ON_ONCE(!load))
@@ -5427,6 +5427,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
update_stats_enqueue_fair(cfs_rq, se, flags);
if (!curr)
__enqueue_entity(cfs_rq, se);
+
+ avg_vruntime_add(cfs_rq, se);
se->on_rq = 1;
if (cfs_rq->nr_running == 1) {
@@ -5530,6 +5532,8 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
if (se != cfs_rq->curr)
__dequeue_entity(cfs_rq, se);
+
+ avg_vruntime_sub(cfs_rq, se);
se->on_rq = 0;
account_entity_dequeue(cfs_rq, se);
@@ -6927,11 +6931,17 @@ requeue_delayed_entity(struct sched_entity *se)
cfs_rq->nr_running--;
if (se != cfs_rq->curr)
__dequeue_entity(cfs_rq, se);
+ avg_vruntime_sub(cfs_rq, se);
+
se->vlag = 0;
place_entity(cfs_rq, se, 0);
+
if (se != cfs_rq->curr)
__enqueue_entity(cfs_rq, se);
+ avg_vruntime_add(cfs_rq, se);
cfs_rq->nr_running++;
+
+ update_min_vruntime(cfs_rq);
}
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 5f3de82ec9c..b5b4062db3e 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -650,8 +650,9 @@ struct cfs_rq {
unsigned int idle_nr_running; /* SCHED_IDLE */
unsigned int idle_h_nr_running; /* SCHED_IDLE */
+ s64 tot_vruntime;
s64 avg_vruntime;
- u64 avg_load;
+ u64 tot_load;
u64 min_vruntime;
#ifdef CONFIG_SCHED_CORE
--
2.45.2
Hello, kernel test robot noticed a 3.5% regression of reaim.jobs_per_min on: commit: e14321ce0d4a9c7a2894b119840a9d58ec4cd25d ("[PATCH v3] sched/eevdf: Remove the consideration of the current task's time in vruntime_eligible() and avg_vruntime()") url: https://github.com/intel-lab-lkp/linux/commits/Xavier/sched-eevdf-Remove-the-consideration-of-the-current-task-s-time-in-vruntime_eligible-and-avg_vruntime/20241018-225521 base: https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git af0c8b2bf67b25756f27644936e74fd9a6273bd2 patch link: https://lore.kernel.org/all/20241018145216.1021494-1-xavier_qy@163.com/ patch subject: [PATCH v3] sched/eevdf: Remove the consideration of the current task's time in vruntime_eligible() and avg_vruntime() testcase: reaim config: x86_64-rhel-8.3 compiler: gcc-12 test machine: 48 threads 2 sockets Intel(R) Xeon(R) CPU E5-2697 v2 @ 2.70GHz (Ivy Bridge-EP) with 64G memory parameters: runtime: 300s nr_task: 100% test: custom cpufreq_governor: performance If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <oliver.sang@intel.com> | Closes: https://lore.kernel.org/oe-lkp/202410251525.9f85854d-oliver.sang@intel.com Details are as below: --------------------------------------------------------------------------------------------------> The kernel config and materials to reproduce are available at: https://download.01.org/0day-ci/archive/20241025/202410251525.9f85854d-oliver.sang@intel.com ========================================================================================= compiler/cpufreq_governor/kconfig/nr_task/rootfs/runtime/tbox_group/test/testcase: gcc-12/performance/x86_64-rhel-8.3/100%/debian-12-x86_64-20240206.cgz/300s/lkp-ivb-2ep2/custom/reaim commit: af0c8b2bf6 ("sched: Split scheduler and execution contexts") e14321ce0d ("sched/eevdf: Remove the consideration of the current task's time in vruntime_eligible() and avg_vruntime()") af0c8b2bf67b2575 e14321ce0d4a9c7a2894b119840 ---------------- --------------------------- %stddev %change %stddev \ | \ 0.14 +0.0 0.19 mpstat.cpu.all.soft% 111.00 ± 80% -89.9% 11.17 ± 23% mpstat.max_utilization.seconds 5858 ± 4% -8.9% 5335 ± 4% numa-meminfo.node1.KernelStack 25970 ± 53% -40.2% 15518 ± 51% numa-meminfo.node1.Mapped 5859 ± 4% -8.9% 5337 ± 4% numa-vmstat.node1.nr_kernel_stack 6643 ± 52% -40.9% 3927 ± 50% numa-vmstat.node1.nr_mapped 22526 -9.4% 20413 vmstat.system.cs 27118 -2.9% 26328 vmstat.system.in 224657 ± 2% -9.1% 204143 meminfo.Active 224625 ± 2% -9.1% 204111 meminfo.Active(anon) 239022 ± 2% -9.0% 217445 meminfo.Shmem 56114 ± 2% -9.0% 51064 proc-vmstat.nr_active_anon 12421 -2.5% 12115 proc-vmstat.nr_mapped 59755 ± 2% -9.0% 54398 proc-vmstat.nr_shmem 56114 ± 2% -9.0% 51064 proc-vmstat.nr_zone_active_anon 69086758 -1.1% 68296855 proc-vmstat.numa_hit 69043741 -1.2% 68247974 proc-vmstat.numa_local 91813 -7.1% 85252 proc-vmstat.pgactivate 71568239 -1.1% 70771661 proc-vmstat.pgalloc_normal 84550200 -1.0% 83664581 proc-vmstat.pgfault 71455511 -1.1% 70664205 proc-vmstat.pgfree 51.42 -1.0% 50.90 reaim.child_utime 200350 -3.5% 193411 reaim.jobs_per_min 4173 -3.5% 4029 reaim.jobs_per_min_child 208595 -2.4% 203678 reaim.max_jobs_per_min 1.37 +3.6% 1.42 reaim.parent_time 1.54 +48.0% 2.28 reaim.std_dev_percent 0.02 ± 2% +46.6% 0.03 reaim.std_dev_time 675635 -67.8% 217563 reaim.time.involuntary_context_switches 83620119 -1.1% 82730368 reaim.time.minor_page_faults 1789 -2.4% 1746 reaim.time.percent_of_cpu_this_job_got 829.97 -2.1% 812.88 reaim.time.system_time 4578 -2.1% 4481 reaim.time.user_time 3394885 -2.5% 3311673 reaim.time.voluntary_context_switches 427200 -1.1% 422400 reaim.workload 1.73 -2.8% 1.68 perf-stat.i.MPKI 3.963e+09 +8.3% 4.291e+09 perf-stat.i.branch-instructions 2.89 +0.1 3.01 perf-stat.i.branch-miss-rate% 92238582 +7.8% 99393054 perf-stat.i.branch-misses 5.351e+08 +7.9% 5.772e+08 perf-stat.i.cache-references 48004 +2.9% 49407 perf-stat.i.cpu-clock 5.535e+10 +7.4% 5.946e+10 perf-stat.i.cpu-cycles 4872 -1.6% 4794 perf-stat.i.cpu-migrations 2440 -1.7% 2398 perf-stat.i.cycles-between-cache-misses 2.87e+10 +8.3% 3.108e+10 perf-stat.i.instructions 0.49 +1.3% 0.50 perf-stat.i.ipc 21.46 +5.5% 22.63 perf-stat.i.major-faults 11.61 -15.1% 9.86 perf-stat.i.metric.K/sec 276192 +9.0% 301016 perf-stat.i.minor-faults 276214 +9.0% 301038 perf-stat.i.page-faults 48004 +2.9% 49407 perf-stat.i.task-clock 5644916 ± 18% +35162.3% 1.991e+09 ± 17% sched_debug.cfs_rq:/.avg_vruntime.avg 17097981 ± 48% +15093.3% 2.598e+09 ± 16% sched_debug.cfs_rq:/.avg_vruntime.max 2702756 ± 10% +45989.8% 1.246e+09 ± 28% sched_debug.cfs_rq:/.avg_vruntime.min 2873896 ± 37% +9275.7% 2.694e+08 ± 22% sched_debug.cfs_rq:/.avg_vruntime.stddev 40604 ± 76% +1.3e+05% 53761749 ± 49% sched_debug.cfs_rq:/.left_deadline.avg 1438807 ± 57% +70833.0% 1.021e+09 ± 25% sched_debug.cfs_rq:/.left_deadline.max 224842 ± 61% +90999.7% 2.048e+08 ± 25% sched_debug.cfs_rq:/.left_deadline.stddev 40603 ± 76% +1.3e+05% 53761750 ± 49% sched_debug.cfs_rq:/.left_vruntime.avg 1438775 ± 57% +70834.6% 1.021e+09 ± 25% sched_debug.cfs_rq:/.left_vruntime.max 224837 ± 61% +91001.7% 2.048e+08 ± 25% sched_debug.cfs_rq:/.left_vruntime.stddev 30025 ± 19% +54.8% 46483 ± 23% sched_debug.cfs_rq:/.load.avg 95346 ± 38% +72.5% 164455 ± 16% sched_debug.cfs_rq:/.load.stddev 5644917 ± 18% +35161.5% 1.99e+09 ± 17% sched_debug.cfs_rq:/.min_vruntime.avg 17097981 ± 48% +15092.9% 2.598e+09 ± 16% sched_debug.cfs_rq:/.min_vruntime.max 2702756 ± 10% +45988.6% 1.246e+09 ± 28% sched_debug.cfs_rq:/.min_vruntime.min 2873897 ± 37% +9275.6% 2.694e+08 ± 22% sched_debug.cfs_rq:/.min_vruntime.stddev 0.25 ± 11% +30.7% 0.33 ± 10% sched_debug.cfs_rq:/.nr_running.stddev 40603 ± 76% +1.3e+05% 53761750 ± 49% sched_debug.cfs_rq:/.right_vruntime.avg 1438775 ± 57% +70834.6% 1.021e+09 ± 25% sched_debug.cfs_rq:/.right_vruntime.max 224837 ± 61% +91001.7% 2.048e+08 ± 25% sched_debug.cfs_rq:/.right_vruntime.stddev 0.00 ±223% +21421.5% 0.23 ± 66% sched_debug.cfs_rq:/.spread.avg 0.05 ±223% +20478.3% 10.64 ± 64% sched_debug.cfs_rq:/.spread.max 0.01 ±223% +20485.4% 1.52 ± 64% sched_debug.cfs_rq:/.spread.stddev 93.49 ± 55% -56.5% 40.71 ± 74% sched_debug.cfs_rq:/.util_avg.min 7.50 ± 10% -2.8 4.65 perf-profile.calltrace.cycles-pp.entry_SYSCALL_64_after_hwframe 7.48 ± 10% -2.8 4.64 perf-profile.calltrace.cycles-pp.do_syscall_64.entry_SYSCALL_64_after_hwframe 8.97 ± 2% -1.4 7.59 perf-profile.calltrace.cycles-pp.common_startup_64 8.80 ± 2% -1.4 7.42 perf-profile.calltrace.cycles-pp.cpu_startup_entry.start_secondary.common_startup_64 8.80 ± 2% -1.4 7.42 perf-profile.calltrace.cycles-pp.start_secondary.common_startup_64 8.78 ± 2% -1.4 7.42 perf-profile.calltrace.cycles-pp.do_idle.cpu_startup_entry.start_secondary.common_startup_64 8.54 ± 2% -1.3 7.22 perf-profile.calltrace.cycles-pp.cpuidle_idle_call.do_idle.cpu_startup_entry.start_secondary.common_startup_64 8.03 ± 2% -1.2 6.86 perf-profile.calltrace.cycles-pp.cpuidle_enter.cpuidle_idle_call.do_idle.cpu_startup_entry.start_secondary 7.96 ± 2% -1.1 6.81 perf-profile.calltrace.cycles-pp.cpuidle_enter_state.cpuidle_enter.cpuidle_idle_call.do_idle.cpu_startup_entry 3.58 ± 4% -0.7 2.88 perf-profile.calltrace.cycles-pp.x64_sys_call.do_syscall_64.entry_SYSCALL_64_after_hwframe 3.55 ± 4% -0.7 2.86 perf-profile.calltrace.cycles-pp.__x64_sys_exit_group.x64_sys_call.do_syscall_64.entry_SYSCALL_64_after_hwframe 3.55 ± 4% -0.7 2.86 perf-profile.calltrace.cycles-pp.do_group_exit.__x64_sys_exit_group.x64_sys_call.do_syscall_64.entry_SYSCALL_64_after_hwframe 3.55 ± 4% -0.7 2.86 perf-profile.calltrace.cycles-pp.do_exit.do_group_exit.__x64_sys_exit_group.x64_sys_call.do_syscall_64 6.16 ± 2% -0.6 5.53 perf-profile.calltrace.cycles-pp.intel_idle.cpuidle_enter_state.cpuidle_enter.cpuidle_idle_call.do_idle 2.31 ± 7% -0.6 1.69 perf-profile.calltrace.cycles-pp.asm_exc_page_fault 2.23 ± 7% -0.6 1.62 perf-profile.calltrace.cycles-pp.exc_page_fault.asm_exc_page_fault 2.18 ± 7% -0.6 1.59 perf-profile.calltrace.cycles-pp.do_user_addr_fault.exc_page_fault.asm_exc_page_fault 2.84 ± 4% -0.5 2.31 ± 2% perf-profile.calltrace.cycles-pp.exit_mm.do_exit.do_group_exit.__x64_sys_exit_group.x64_sys_call 2.83 ± 4% -0.5 2.31 ± 2% perf-profile.calltrace.cycles-pp.mmput.exit_mm.do_exit.do_group_exit.__x64_sys_exit_group 2.82 ± 4% -0.5 2.30 ± 2% perf-profile.calltrace.cycles-pp.exit_mmap.mmput.exit_mm.do_exit.do_group_exit 1.87 ± 7% -0.5 1.36 perf-profile.calltrace.cycles-pp.handle_mm_fault.do_user_addr_fault.exc_page_fault.asm_exc_page_fault 1.63 ± 4% -0.5 1.14 ± 2% perf-profile.calltrace.cycles-pp.asm_sysvec_apic_timer_interrupt.cpuidle_enter_state.cpuidle_enter.cpuidle_idle_call.do_idle 1.73 ± 7% -0.5 1.26 perf-profile.calltrace.cycles-pp.__handle_mm_fault.handle_mm_fault.do_user_addr_fault.exc_page_fault.asm_exc_page_fault 1.54 ± 4% -0.5 1.08 ± 2% perf-profile.calltrace.cycles-pp.sysvec_apic_timer_interrupt.asm_sysvec_apic_timer_interrupt.cpuidle_enter_state.cpuidle_enter.cpuidle_idle_call 1.00 ± 4% -0.3 0.70 ± 2% perf-profile.calltrace.cycles-pp.__sysvec_apic_timer_interrupt.sysvec_apic_timer_interrupt.asm_sysvec_apic_timer_interrupt.cpuidle_enter_state.cpuidle_enter 0.98 ± 4% -0.3 0.68 ± 2% perf-profile.calltrace.cycles-pp.hrtimer_interrupt.__sysvec_apic_timer_interrupt.sysvec_apic_timer_interrupt.asm_sysvec_apic_timer_interrupt.cpuidle_enter_state 1.21 ± 3% -0.3 0.96 perf-profile.calltrace.cycles-pp.setlocale 1.24 ± 4% -0.2 1.01 perf-profile.calltrace.cycles-pp.unmap_vmas.exit_mmap.mmput.exit_mm.do_exit 0.78 ± 5% -0.2 0.55 ± 2% perf-profile.calltrace.cycles-pp.__hrtimer_run_queues.hrtimer_interrupt.__sysvec_apic_timer_interrupt.sysvec_apic_timer_interrupt.asm_sysvec_apic_timer_interrupt 0.87 ± 7% -0.2 0.65 ± 3% perf-profile.calltrace.cycles-pp.do_fault.__handle_mm_fault.handle_mm_fault.do_user_addr_fault.exc_page_fault 1.17 ± 4% -0.2 0.95 perf-profile.calltrace.cycles-pp.unmap_page_range.unmap_vmas.exit_mmap.mmput.exit_mm 1.14 ± 4% -0.2 0.92 perf-profile.calltrace.cycles-pp.zap_pmd_range.unmap_page_range.unmap_vmas.exit_mmap.mmput 1.11 ± 4% -0.2 0.90 perf-profile.calltrace.cycles-pp.zap_pte_range.zap_pmd_range.unmap_page_range.unmap_vmas.exit_mmap 0.77 ± 7% -0.2 0.57 ± 3% perf-profile.calltrace.cycles-pp.do_read_fault.do_fault.__handle_mm_fault.handle_mm_fault.do_user_addr_fault 0.73 ± 7% -0.2 0.54 ± 2% perf-profile.calltrace.cycles-pp.filemap_map_pages.do_read_fault.do_fault.__handle_mm_fault.handle_mm_fault 1.56 -0.2 1.37 perf-profile.calltrace.cycles-pp.__x64_sys_execve.do_syscall_64.entry_SYSCALL_64_after_hwframe.execve 1.56 -0.2 1.37 perf-profile.calltrace.cycles-pp.do_syscall_64.entry_SYSCALL_64_after_hwframe.execve 1.56 -0.2 1.37 perf-profile.calltrace.cycles-pp.entry_SYSCALL_64_after_hwframe.execve 1.56 -0.2 1.38 perf-profile.calltrace.cycles-pp.execve 0.92 ± 3% -0.2 0.74 ± 2% perf-profile.calltrace.cycles-pp.zap_present_ptes.zap_pte_range.zap_pmd_range.unmap_page_range.unmap_vmas 1.55 -0.2 1.37 perf-profile.calltrace.cycles-pp.do_execveat_common.__x64_sys_execve.do_syscall_64.entry_SYSCALL_64_after_hwframe.execve 1.11 -0.1 0.98 perf-profile.calltrace.cycles-pp.bprm_execve.do_execveat_common.__x64_sys_execve.do_syscall_64.entry_SYSCALL_64_after_hwframe 1.00 ± 2% -0.1 0.89 perf-profile.calltrace.cycles-pp.search_binary_handler.exec_binprm.bprm_execve.do_execveat_common.__x64_sys_execve 1.00 ± 2% -0.1 0.89 perf-profile.calltrace.cycles-pp.exec_binprm.bprm_execve.do_execveat_common.__x64_sys_execve.do_syscall_64 0.97 ± 2% -0.1 0.86 perf-profile.calltrace.cycles-pp.load_elf_binary.search_binary_handler.exec_binprm.bprm_execve.do_execveat_common 0.63 ± 6% -0.1 0.53 ± 3% perf-profile.calltrace.cycles-pp.free_pgtables.exit_mmap.mmput.exit_mm.do_exit 0.80 ± 4% +0.0 0.85 ± 2% perf-profile.calltrace.cycles-pp.brk 1.92 ± 2% +0.1 2.00 perf-profile.calltrace.cycles-pp.do_syscall_64.entry_SYSCALL_64_after_hwframe._Fork 1.92 ± 2% +0.1 2.00 perf-profile.calltrace.cycles-pp.entry_SYSCALL_64_after_hwframe._Fork 1.91 ± 2% +0.1 2.00 perf-profile.calltrace.cycles-pp.__do_sys_clone.do_syscall_64.entry_SYSCALL_64_after_hwframe._Fork 1.91 ± 2% +0.1 2.00 perf-profile.calltrace.cycles-pp.kernel_clone.__do_sys_clone.do_syscall_64.entry_SYSCALL_64_after_hwframe._Fork 2.18 ± 2% +0.1 2.28 perf-profile.calltrace.cycles-pp._Fork 0.00 +0.5 0.52 ± 2% perf-profile.calltrace.cycles-pp.asm_sysvec_apic_timer_interrupt.sieve.runloop.multiuser.main 0.47 ± 79% +0.7 1.15 ± 14% perf-profile.calltrace.cycles-pp.native_queued_spin_lock_slowpath._raw_spin_lock.scan_positives.dcache_readdir.iterate_dir 0.73 ± 56% +0.7 1.43 ± 12% perf-profile.calltrace.cycles-pp.dcache_readdir.iterate_dir.__x64_sys_getdents64.do_syscall_64.entry_SYSCALL_64_after_hwframe 0.73 ± 56% +0.7 1.43 ± 12% perf-profile.calltrace.cycles-pp.__x64_sys_getdents64.do_syscall_64.entry_SYSCALL_64_after_hwframe.getdents64 0.73 ± 56% +0.7 1.43 ± 12% perf-profile.calltrace.cycles-pp.do_syscall_64.entry_SYSCALL_64_after_hwframe.getdents64 0.73 ± 56% +0.7 1.43 ± 12% perf-profile.calltrace.cycles-pp.entry_SYSCALL_64_after_hwframe.getdents64 0.73 ± 56% +0.7 1.43 ± 12% perf-profile.calltrace.cycles-pp.iterate_dir.__x64_sys_getdents64.do_syscall_64.entry_SYSCALL_64_after_hwframe.getdents64 0.73 ± 56% +0.7 1.44 ± 12% perf-profile.calltrace.cycles-pp.getdents64 0.55 ± 78% +0.7 1.27 ± 13% perf-profile.calltrace.cycles-pp._raw_spin_lock.scan_positives.dcache_readdir.iterate_dir.__x64_sys_getdents64 0.58 ± 78% +0.8 1.34 ± 13% perf-profile.calltrace.cycles-pp.scan_positives.dcache_readdir.iterate_dir.__x64_sys_getdents64.do_syscall_64 66.34 +5.5 71.89 perf-profile.calltrace.cycles-pp.sieve.runloop.multiuser.main 69.74 +5.8 75.52 perf-profile.calltrace.cycles-pp.main 69.74 +5.8 75.52 perf-profile.calltrace.cycles-pp.multiuser.main 69.73 +5.8 75.51 perf-profile.calltrace.cycles-pp.runloop.multiuser.main 16.72 ± 4% -2.5 14.23 perf-profile.children.cycles-pp.entry_SYSCALL_64_after_hwframe 16.67 ± 4% -2.5 14.18 perf-profile.children.cycles-pp.do_syscall_64 8.97 ± 2% -1.4 7.59 perf-profile.children.cycles-pp.common_startup_64 8.97 ± 2% -1.4 7.59 perf-profile.children.cycles-pp.cpu_startup_entry 8.97 ± 2% -1.4 7.59 perf-profile.children.cycles-pp.do_idle 8.80 ± 2% -1.4 7.42 perf-profile.children.cycles-pp.start_secondary 8.72 ± 2% -1.3 7.38 perf-profile.children.cycles-pp.cpuidle_idle_call 8.20 ± 2% -1.2 7.02 perf-profile.children.cycles-pp.cpuidle_enter 8.18 ± 2% -1.2 7.00 perf-profile.children.cycles-pp.cpuidle_enter_state 3.73 ± 4% -0.7 3.02 perf-profile.children.cycles-pp.x64_sys_call 3.64 ± 4% -0.7 2.95 perf-profile.children.cycles-pp.__x64_sys_exit_group 3.64 ± 4% -0.7 2.95 perf-profile.children.cycles-pp.do_exit 3.64 ± 4% -0.7 2.95 perf-profile.children.cycles-pp.do_group_exit 4.03 ± 4% -0.7 3.35 perf-profile.children.cycles-pp.asm_exc_page_fault 3.77 ± 4% -0.6 3.12 perf-profile.children.cycles-pp.exc_page_fault 2.60 -0.6 1.96 perf-profile.children.cycles-pp.asm_sysvec_apic_timer_interrupt 6.29 ± 2% -0.6 5.66 ± 2% perf-profile.children.cycles-pp.intel_idle 3.70 ± 4% -0.6 3.07 perf-profile.children.cycles-pp.do_user_addr_fault 2.44 -0.6 1.85 perf-profile.children.cycles-pp.sysvec_apic_timer_interrupt 3.25 ± 4% -0.6 2.68 perf-profile.children.cycles-pp.handle_mm_fault 2.86 ± 4% -0.5 2.32 ± 2% perf-profile.children.cycles-pp.exit_mm 2.84 ± 4% -0.5 2.31 ± 2% perf-profile.children.cycles-pp.mmput 3.00 ± 4% -0.5 2.48 perf-profile.children.cycles-pp.__handle_mm_fault 2.83 ± 4% -0.5 2.30 ± 2% perf-profile.children.cycles-pp.exit_mmap 2.88 ± 5% -0.5 2.42 perf-profile.children.cycles-pp.kernel_clone 1.81 -0.4 1.38 perf-profile.children.cycles-pp.__x64_sys_execve 1.61 -0.4 1.18 perf-profile.children.cycles-pp.__sysvec_apic_timer_interrupt 1.81 ± 2% -0.4 1.38 perf-profile.children.cycles-pp.do_execveat_common 1.58 -0.4 1.16 perf-profile.children.cycles-pp.hrtimer_interrupt 2.37 ± 6% -0.4 2.00 perf-profile.children.cycles-pp.__do_sys_clone 2.38 ± 6% -0.4 2.02 perf-profile.children.cycles-pp.copy_process 1.58 ± 3% -0.3 1.24 ± 2% perf-profile.children.cycles-pp.do_sys_openat2 1.32 -0.3 0.98 perf-profile.children.cycles-pp.__hrtimer_run_queues 1.56 ± 2% -0.3 1.23 ± 2% perf-profile.children.cycles-pp.vm_mmap_pgoff 1.48 ± 3% -0.3 1.16 ± 2% perf-profile.children.cycles-pp.do_filp_open 1.48 ± 2% -0.3 1.17 ± 2% perf-profile.children.cycles-pp.do_mmap 1.44 ± 3% -0.3 1.14 ± 2% perf-profile.children.cycles-pp.path_openat 1.30 ± 2% -0.3 1.00 perf-profile.children.cycles-pp.bprm_execve 1.78 ± 7% -0.3 1.49 perf-profile.children.cycles-pp.dup_mm 1.32 ± 2% -0.3 1.04 perf-profile.children.cycles-pp.mmap_region 1.40 ± 5% -0.3 1.14 ± 2% perf-profile.children.cycles-pp.do_fault 1.16 ± 2% -0.3 0.89 perf-profile.children.cycles-pp.search_binary_handler 1.17 ± 4% -0.3 0.91 ± 2% perf-profile.children.cycles-pp.__schedule 1.16 ± 2% -0.3 0.90 perf-profile.children.cycles-pp.exec_binprm 1.22 ± 3% -0.3 0.96 perf-profile.children.cycles-pp.setlocale 1.12 ± 2% -0.3 0.86 perf-profile.children.cycles-pp.load_elf_binary 1.55 ± 7% -0.3 1.30 perf-profile.children.cycles-pp.dup_mmap 1.35 ± 4% -0.2 1.10 ± 2% perf-profile.children.cycles-pp.unmap_vmas 1.05 ± 5% -0.2 0.81 ± 3% perf-profile.children.cycles-pp.__x64_sys_openat 1.27 ± 4% -0.2 1.04 ± 2% perf-profile.children.cycles-pp.unmap_page_range 1.22 ± 4% -0.2 1.00 ± 2% perf-profile.children.cycles-pp.zap_pmd_range 1.22 ± 5% -0.2 1.00 ± 2% perf-profile.children.cycles-pp.do_read_fault 1.19 ± 4% -0.2 0.97 ± 2% perf-profile.children.cycles-pp.zap_pte_range 0.92 ± 2% -0.2 0.71 ± 2% perf-profile.children.cycles-pp.handle_softirqs 1.17 ± 5% -0.2 0.96 perf-profile.children.cycles-pp.filemap_map_pages 1.01 ± 2% -0.2 0.79 ± 2% perf-profile.children.cycles-pp.ksys_mmap_pgoff 0.90 ± 2% -0.2 0.69 ± 2% perf-profile.children.cycles-pp.tick_nohz_handler 0.64 ± 5% -0.2 0.44 ± 2% perf-profile.children.cycles-pp.ret_from_fork_asm 0.58 ± 6% -0.2 0.38 ± 2% perf-profile.children.cycles-pp.ret_from_fork 0.89 ± 2% -0.2 0.69 ± 2% perf-profile.children.cycles-pp.entry_SYSRETQ_unsafe_stack 0.99 ± 3% -0.2 0.80 ± 2% perf-profile.children.cycles-pp.zap_present_ptes 0.35 ± 5% -0.2 0.16 ± 3% perf-profile.children.cycles-pp.smpboot_thread_fn 0.42 ± 6% -0.2 0.24 ± 3% perf-profile.children.cycles-pp.kthread 1.56 -0.2 1.38 perf-profile.children.cycles-pp.execve 0.80 ± 2% -0.2 0.61 ± 2% perf-profile.children.cycles-pp.update_process_times 0.80 ± 3% -0.2 0.63 ± 3% perf-profile.children.cycles-pp.schedule 0.27 ± 5% -0.2 0.12 ± 4% perf-profile.children.cycles-pp.run_ksoftirqd 0.74 ± 6% -0.2 0.59 ± 3% perf-profile.children.cycles-pp.__alloc_pages_noprof 0.79 ± 6% -0.2 0.64 ± 2% perf-profile.children.cycles-pp.alloc_pages_mpol_noprof 0.70 ± 3% -0.2 0.54 ± 4% perf-profile.children.cycles-pp.link_path_walk 0.86 ± 5% -0.1 0.71 ± 2% perf-profile.children.cycles-pp.kmem_cache_free 0.62 ± 6% -0.1 0.47 perf-profile.children.cycles-pp.copy_page_range 0.86 ± 3% -0.1 0.72 perf-profile.children.cycles-pp.kmem_cache_alloc_noprof 0.62 ± 2% -0.1 0.48 ± 2% perf-profile.children.cycles-pp.elf_load 0.60 ± 6% -0.1 0.46 perf-profile.children.cycles-pp.copy_p4d_range 0.73 ± 4% -0.1 0.59 ± 3% perf-profile.children.cycles-pp.tlb_finish_mmu 0.67 ± 5% -0.1 0.54 ± 3% perf-profile.children.cycles-pp.__irqentry_text_end 0.58 ± 4% -0.1 0.45 ± 4% perf-profile.children.cycles-pp.pick_next_task_fair 0.60 ± 6% -0.1 0.47 ± 3% perf-profile.children.cycles-pp.__open64_nocancel 0.41 ± 5% -0.1 0.28 ± 5% perf-profile.children.cycles-pp.menu_select 0.58 ± 6% -0.1 0.45 ± 3% perf-profile.children.cycles-pp.get_page_from_freelist 0.78 ± 5% -0.1 0.66 ± 2% perf-profile.children.cycles-pp.__do_sys_brk 0.62 ± 3% -0.1 0.49 ± 2% perf-profile.children.cycles-pp.__do_sys_wait4 0.60 ± 3% -0.1 0.48 ± 2% perf-profile.children.cycles-pp.do_wait 0.61 ± 3% -0.1 0.49 ± 2% perf-profile.children.cycles-pp.kernel_wait4 0.46 ± 7% -0.1 0.34 ± 2% perf-profile.children.cycles-pp.copy_pte_range 0.55 ± 4% -0.1 0.43 ± 3% perf-profile.children.cycles-pp.sched_balance_rq 0.69 ± 5% -0.1 0.58 ± 3% perf-profile.children.cycles-pp.free_pgtables 0.52 ± 3% -0.1 0.41 ± 3% perf-profile.children.cycles-pp.rcu_core 0.61 ± 3% -0.1 0.50 ± 4% perf-profile.children.cycles-pp.__tlb_batch_free_encoded_pages 0.61 ± 3% -0.1 0.50 ± 4% perf-profile.children.cycles-pp.free_pages_and_swap_cache 0.59 ± 5% -0.1 0.48 perf-profile.children.cycles-pp.__memcg_slab_post_alloc_hook 0.49 ± 4% -0.1 0.38 ± 4% perf-profile.children.cycles-pp.walk_component 0.62 ± 5% -0.1 0.51 ± 4% perf-profile.children.cycles-pp.do_anonymous_page 0.41 ± 3% -0.1 0.31 ± 2% perf-profile.children.cycles-pp.__split_vma 0.54 ± 4% -0.1 0.44 ± 3% perf-profile.children.cycles-pp.perf_event_mmap 0.46 ± 3% -0.1 0.36 ± 2% perf-profile.children.cycles-pp.sched_tick 0.54 ± 2% -0.1 0.44 perf-profile.children.cycles-pp.__mmap 0.54 ± 2% -0.1 0.44 perf-profile.children.cycles-pp.__x64_sys_creat 0.55 ± 6% -0.1 0.45 ± 2% perf-profile.children.cycles-pp.next_uptodate_folio 0.50 ± 4% -0.1 0.41 ± 4% perf-profile.children.cycles-pp.sched_balance_newidle 0.61 ± 4% -0.1 0.52 ± 3% perf-profile.children.cycles-pp.__irq_exit_rcu 0.52 ± 5% -0.1 0.42 ± 5% perf-profile.children.cycles-pp.perf_event_mmap_event 0.42 ± 4% -0.1 0.33 ± 2% perf-profile.children.cycles-pp.syscall_return_via_sysret 0.63 ± 3% -0.1 0.54 ± 4% perf-profile.children.cycles-pp.ksys_write 0.45 ± 3% -0.1 0.36 ± 3% perf-profile.children.cycles-pp.rcu_do_batch 0.50 ± 3% -0.1 0.41 ± 4% perf-profile.children.cycles-pp.do_vmi_align_munmap 0.38 ± 6% -0.1 0.30 ± 3% perf-profile.children.cycles-pp.clear_page_erms 0.61 ± 4% -0.1 0.52 ± 5% perf-profile.children.cycles-pp.vfs_write 0.43 ± 4% -0.1 0.35 ± 4% perf-profile.children.cycles-pp.entry_SYSCALL_64 0.44 ± 4% -0.1 0.36 ± 3% perf-profile.children.cycles-pp.folios_put_refs 0.28 ± 6% -0.1 0.20 ± 3% perf-profile.children.cycles-pp.copy_present_ptes 0.35 ± 4% -0.1 0.27 ± 3% perf-profile.children.cycles-pp.syscall_exit_to_user_mode 0.41 ± 6% -0.1 0.33 ± 4% perf-profile.children.cycles-pp.__memcg_slab_free_hook 0.37 ± 4% -0.1 0.30 ± 3% perf-profile.children.cycles-pp.mas_store_prealloc 0.30 ± 4% -0.1 0.23 ± 3% perf-profile.children.cycles-pp.load_elf_interp 0.38 ± 2% -0.1 0.30 ± 5% perf-profile.children.cycles-pp.ksys_read 0.41 ± 4% -0.1 0.34 ± 5% perf-profile.children.cycles-pp.perf_iterate_sb 0.37 -0.1 0.30 ± 5% perf-profile.children.cycles-pp.vfs_read 0.55 ± 3% -0.1 0.48 perf-profile.children.cycles-pp.wp_page_copy 0.44 ± 6% -0.1 0.37 ± 3% perf-profile.children.cycles-pp.vma_alloc_folio_noprof 0.22 ± 9% -0.1 0.15 ± 8% perf-profile.children.cycles-pp.tick_nohz_get_sleep_length 0.30 ± 4% -0.1 0.23 perf-profile.children.cycles-pp.vms_gather_munmap_vmas 0.43 ± 6% -0.1 0.36 ± 3% perf-profile.children.cycles-pp.do_brk_flags 0.27 ± 12% -0.1 0.20 ± 5% perf-profile.children.cycles-pp.d_alloc_parallel 0.35 ± 3% -0.1 0.28 ± 5% perf-profile.children.cycles-pp.sched_balance_find_src_group 0.34 ± 7% -0.1 0.28 ± 3% perf-profile.children.cycles-pp.mm_init 0.33 ± 3% -0.1 0.27 ± 6% perf-profile.children.cycles-pp.update_sd_lb_stats 0.28 ± 5% -0.1 0.22 ± 4% perf-profile.children.cycles-pp.__lookup_slow 0.33 ± 4% -0.1 0.27 ± 4% perf-profile.children.cycles-pp.wake_up_new_task 0.28 ± 6% -0.1 0.21 ± 4% perf-profile.children.cycles-pp.__x64_sys_mprotect 0.35 ± 3% -0.1 0.29 ± 3% perf-profile.children.cycles-pp.__do_sys_newfstatat 0.34 ± 3% -0.1 0.27 perf-profile.children.cycles-pp.select_task_rq_fair 0.38 ± 7% -0.1 0.31 ± 4% perf-profile.children.cycles-pp.alloc_anon_folio 0.41 ± 7% -0.1 0.34 ± 4% perf-profile.children.cycles-pp.folio_alloc_mpol_noprof 0.26 ± 3% -0.1 0.20 ± 4% perf-profile.children.cycles-pp.__do_sys_clone3 0.27 ± 5% -0.1 0.21 ± 4% perf-profile.children.cycles-pp.do_mprotect_pkey 0.40 ± 4% -0.1 0.34 ± 3% perf-profile.children.cycles-pp.vms_clear_ptes 0.30 ± 7% -0.1 0.24 ± 4% perf-profile.children.cycles-pp.finish_task_switch 0.23 ± 10% -0.1 0.17 ± 5% perf-profile.children.cycles-pp.open_last_lookups 0.38 ± 3% -0.1 0.32 ± 5% perf-profile.children.cycles-pp.vms_complete_munmap_vmas 0.16 ± 11% -0.1 0.10 ± 9% perf-profile.children.cycles-pp.tick_nohz_next_event 0.30 ± 4% -0.1 0.24 ± 4% perf-profile.children.cycles-pp.folio_remove_rmap_ptes 0.38 ± 6% -0.1 0.33 ± 5% perf-profile.children.cycles-pp.unlink_anon_vmas 0.23 ± 6% -0.1 0.18 ± 6% perf-profile.children.cycles-pp.dequeue_entities 0.23 ± 5% -0.1 0.18 ± 3% perf-profile.children.cycles-pp.mprotect_fixup 0.29 ± 4% -0.1 0.24 ± 2% perf-profile.children.cycles-pp.filename_lookup 0.21 ± 6% -0.1 0.16 ± 3% perf-profile.children.cycles-pp.__cond_resched 0.26 ± 4% -0.1 0.21 ± 4% perf-profile.children.cycles-pp.__vm_munmap 0.29 ± 3% -0.1 0.24 ± 3% perf-profile.children.cycles-pp.do_open 0.22 ± 6% -0.1 0.16 ± 5% perf-profile.children.cycles-pp.do_task_dead 0.26 ± 5% -0.1 0.21 ± 2% perf-profile.children.cycles-pp.native_irq_return_iret 0.20 ± 12% -0.1 0.14 ± 8% perf-profile.children.cycles-pp.perf_mux_hrtimer_handler 0.25 ± 7% -0.1 0.20 ± 7% perf-profile.children.cycles-pp.alloc_empty_file 0.24 ± 5% -0.1 0.20 ± 5% perf-profile.children.cycles-pp.enqueue_task_fair 0.21 ± 7% -0.1 0.16 ± 6% perf-profile.children.cycles-pp.update_load_avg 0.28 ± 9% -0.0 0.23 ± 3% perf-profile.children.cycles-pp.mod_objcg_state 0.27 ± 4% -0.0 0.22 ± 6% perf-profile.children.cycles-pp.update_sg_lb_stats 0.21 ± 2% -0.0 0.16 ± 4% perf-profile.children.cycles-pp.alloc_bprm 0.26 ± 2% -0.0 0.21 ± 3% perf-profile.children.cycles-pp.sched_balance_find_dst_group 0.28 ± 5% -0.0 0.24 ± 3% perf-profile.children.cycles-pp.vfs_fstatat 0.27 ± 4% -0.0 0.23 ± 2% perf-profile.children.cycles-pp.path_lookupat 0.28 ± 5% -0.0 0.23 perf-profile.children.cycles-pp.vm_area_dup 0.19 ± 3% -0.0 0.14 ± 3% perf-profile.children.cycles-pp.copy_strings 0.24 ± 5% -0.0 0.19 ± 6% perf-profile.children.cycles-pp.dequeue_task_fair 0.23 ± 3% -0.0 0.19 ± 2% perf-profile.children.cycles-pp.update_sg_wakeup_stats 0.22 ± 5% -0.0 0.17 ± 2% perf-profile.children.cycles-pp.d_alloc 0.25 ± 3% -0.0 0.21 ± 5% perf-profile.children.cycles-pp.do_vmi_munmap 0.15 ± 17% -0.0 0.11 ± 8% perf-profile.children.cycles-pp.lookup_open 0.25 ± 2% -0.0 0.21 ± 6% perf-profile.children.cycles-pp.perf_event_mmap_output 0.20 ± 7% -0.0 0.16 ± 2% perf-profile.children.cycles-pp.set_pte_range 0.31 ± 5% -0.0 0.26 ± 4% perf-profile.children.cycles-pp.down_write 0.20 ± 10% -0.0 0.16 ± 4% perf-profile.children.cycles-pp.mas_walk 0.22 ± 4% -0.0 0.18 ± 5% perf-profile.children.cycles-pp.memset_orig 0.23 ± 5% -0.0 0.19 ± 3% perf-profile.children.cycles-pp.pcpu_alloc_noprof 0.17 ± 4% -0.0 0.13 ± 5% perf-profile.children.cycles-pp.vma_complete 0.16 ± 3% -0.0 0.11 ± 4% perf-profile.children.cycles-pp.get_arg_page 0.20 ± 8% -0.0 0.16 ± 4% perf-profile.children.cycles-pp.__mmdrop 0.22 ± 8% -0.0 0.18 ± 4% perf-profile.children.cycles-pp.__mod_memcg_lruvec_state 0.22 ± 6% -0.0 0.18 ± 6% perf-profile.children.cycles-pp.enqueue_task 0.24 ± 5% -0.0 0.20 ± 3% perf-profile.children.cycles-pp.lock_vma_under_rcu 0.26 ± 8% -0.0 0.22 ± 4% perf-profile.children.cycles-pp.___perf_sw_event 0.20 ± 4% -0.0 0.16 ± 2% perf-profile.children.cycles-pp.error_entry 0.18 ± 4% -0.0 0.14 ± 4% perf-profile.children.cycles-pp.mas_wr_node_store 0.17 ± 12% -0.0 0.13 ± 2% perf-profile.children.cycles-pp.pte_alloc_one 0.25 ± 4% -0.0 0.21 ± 2% perf-profile.children.cycles-pp.vfs_statx 0.29 ± 4% -0.0 0.25 ± 3% perf-profile.children.cycles-pp.__vfork 0.20 ± 4% -0.0 0.16 ± 4% perf-profile.children.cycles-pp.lookup_fast 0.17 ± 6% -0.0 0.13 ± 5% perf-profile.children.cycles-pp.vma_modify_flags 0.28 ± 8% -0.0 0.24 ± 5% perf-profile.children.cycles-pp.__perf_sw_event 0.15 ± 7% -0.0 0.12 ± 4% perf-profile.children.cycles-pp.rep_stos_alternative 0.14 ± 7% -0.0 0.10 ± 9% perf-profile.children.cycles-pp.sched_balance_domains 0.14 ± 19% -0.0 0.10 ± 8% perf-profile.children.cycles-pp.__d_lookup_rcu 0.18 ± 4% -0.0 0.14 ± 6% perf-profile.children.cycles-pp.__d_alloc 0.07 ± 15% -0.0 0.04 ± 71% perf-profile.children.cycles-pp.delay_tsc 0.24 ± 5% -0.0 0.20 ± 5% perf-profile.children.cycles-pp.mas_find 0.22 ± 4% -0.0 0.18 ± 2% perf-profile.children.cycles-pp.dup_task_struct 0.18 ± 2% -0.0 0.15 ± 3% perf-profile.children.cycles-pp.__x64_sys_close 0.40 ± 9% -0.0 0.36 ± 3% perf-profile.children.cycles-pp.anon_vma_fork 0.08 ± 8% -0.0 0.05 ± 45% perf-profile.children.cycles-pp.__get_next_timer_interrupt 0.24 ± 2% -0.0 0.20 ± 5% perf-profile.children.cycles-pp.fstatat64 0.17 ± 5% -0.0 0.13 ± 2% perf-profile.children.cycles-pp.shmem_file_write_iter 0.16 ± 6% -0.0 0.13 ± 5% perf-profile.children.cycles-pp.vma_modify 0.13 ± 2% -0.0 0.10 ± 9% perf-profile.children.cycles-pp.vma_interval_tree_insert 0.14 ± 11% -0.0 0.11 ± 7% perf-profile.children.cycles-pp.ktime_get 0.15 ± 7% -0.0 0.11 ± 8% perf-profile.children.cycles-pp.rep_movs_alternative 0.17 ± 7% -0.0 0.14 ± 5% perf-profile.children.cycles-pp.__get_unmapped_area 0.22 ± 5% -0.0 0.18 ± 4% perf-profile.children.cycles-pp.__slab_free 0.14 ± 4% -0.0 0.11 ± 12% perf-profile.children.cycles-pp.sched_balance_update_blocked_averages 0.14 ± 5% -0.0 0.10 ± 4% perf-profile.children.cycles-pp.step_into 0.17 ± 7% -0.0 0.14 ± 5% perf-profile.children.cycles-pp.__fput 0.21 ± 6% -0.0 0.18 ± 4% perf-profile.children.cycles-pp.copy_mc_enhanced_fast_string 0.15 ± 6% -0.0 0.12 ± 8% perf-profile.children.cycles-pp.__wait_for_common 0.14 ± 5% -0.0 0.11 ± 6% perf-profile.children.cycles-pp.wait_for_completion_state 0.11 ± 6% -0.0 0.08 ± 6% perf-profile.children.cycles-pp.rcu_sched_clock_irq 0.09 ± 5% -0.0 0.06 ± 6% perf-profile.children.cycles-pp.clockevents_program_event 0.14 ± 5% -0.0 0.11 ± 8% perf-profile.children.cycles-pp.__libc_early_init 0.13 ± 7% -0.0 0.10 ± 5% perf-profile.children.cycles-pp.enqueue_entity 0.14 ± 6% -0.0 0.11 ± 3% perf-profile.children.cycles-pp.generic_perform_write 0.12 ± 6% -0.0 0.08 ± 5% perf-profile.children.cycles-pp.get_user_pages_remote 0.11 ± 8% -0.0 0.08 ± 12% perf-profile.children.cycles-pp.sched_balance_softirq 0.19 ± 5% -0.0 0.16 ± 2% perf-profile.children.cycles-pp.__lruvec_stat_mod_folio 0.18 ± 6% -0.0 0.14 ± 5% perf-profile.children.cycles-pp.task_tick_fair 0.14 ± 5% -0.0 0.12 ± 4% perf-profile.children.cycles-pp.sched_move_task 0.11 ± 6% -0.0 0.08 ± 4% perf-profile.children.cycles-pp.__get_user_pages 0.15 ± 6% -0.0 0.12 ± 5% perf-profile.children.cycles-pp.schedule_timeout 0.17 ± 5% -0.0 0.14 ± 4% perf-profile.children.cycles-pp.__do_wait 0.12 ± 7% -0.0 0.09 ± 4% perf-profile.children.cycles-pp.perf_rotate_context 0.16 ± 9% -0.0 0.13 ± 2% perf-profile.children.cycles-pp.flush_tlb_mm_range 0.21 ± 6% -0.0 0.18 ± 3% perf-profile.children.cycles-pp.vma_merge_new_range 0.14 ± 4% -0.0 0.11 ± 6% perf-profile.children.cycles-pp.kmem_cache_alloc_lru_noprof 0.15 ± 6% -0.0 0.12 ± 7% perf-profile.children.cycles-pp.pipe_read 0.06 ± 6% -0.0 0.03 ± 70% perf-profile.children.cycles-pp.strnlen_user 0.14 ± 7% -0.0 0.11 ± 5% perf-profile.children.cycles-pp.vma_prepare 0.15 ± 3% -0.0 0.12 ± 6% perf-profile.children.cycles-pp.__x64_sys_munmap 0.16 ± 4% -0.0 0.13 ± 8% perf-profile.children.cycles-pp.up_write 0.25 ± 4% -0.0 0.22 ± 4% perf-profile.children.cycles-pp.__x64_sys_vfork 0.13 ± 5% -0.0 0.10 ± 5% perf-profile.children.cycles-pp.dequeue_entity 0.14 ± 5% -0.0 0.11 ± 4% perf-profile.children.cycles-pp.setup_arg_pages 0.11 ± 6% -0.0 0.08 ± 7% perf-profile.children.cycles-pp.__mem_cgroup_charge 0.12 ± 5% -0.0 0.10 ± 8% perf-profile.children.cycles-pp.__rseq_handle_notify_resume 0.12 ± 7% -0.0 0.10 ± 7% perf-profile.children.cycles-pp.arch_get_unmapped_area_topdown 0.15 ± 7% -0.0 0.13 ± 5% perf-profile.children.cycles-pp.try_to_wake_up 0.10 ± 4% -0.0 0.08 ± 4% perf-profile.children.cycles-pp.update_rq_clock_task 0.12 ± 7% -0.0 0.10 ± 4% perf-profile.children.cycles-pp.wait_task_zombie 0.15 ± 4% -0.0 0.13 ± 6% perf-profile.children.cycles-pp.__x64_sys_unlink 0.18 ± 4% -0.0 0.16 ± 6% perf-profile.children.cycles-pp.sock_write_iter 0.15 ± 4% -0.0 0.12 ± 3% perf-profile.children.cycles-pp._raw_spin_lock_irqsave 0.08 ± 8% -0.0 0.06 perf-profile.children.cycles-pp.copy_string_kernel 0.10 ± 5% -0.0 0.07 ± 8% perf-profile.children.cycles-pp.do_dentry_open 0.09 -0.0 0.06 ± 11% perf-profile.children.cycles-pp.enqueue_hrtimer 0.07 ± 5% -0.0 0.04 ± 45% perf-profile.children.cycles-pp.read_tsc 0.12 ± 3% -0.0 0.09 ± 5% perf-profile.children.cycles-pp.begin_new_exec 0.11 ± 7% -0.0 0.08 ± 5% perf-profile.children.cycles-pp.folio_add_file_rmap_ptes 0.12 ± 6% -0.0 0.09 perf-profile.children.cycles-pp.relocate_vma_down 0.10 ± 10% -0.0 0.07 ± 5% perf-profile.children.cycles-pp.free_unref_folios 0.10 ± 10% -0.0 0.07 ± 9% perf-profile.children.cycles-pp.irq_enter_rcu 0.11 ± 4% -0.0 0.08 ± 5% perf-profile.children.cycles-pp.vfs_open 0.07 ± 10% -0.0 0.04 ± 45% perf-profile.children.cycles-pp.free_unref_page 0.06 ± 7% -0.0 0.04 ± 44% perf-profile.children.cycles-pp.sched_clock_cpu 0.10 ± 9% -0.0 0.08 ± 8% perf-profile.children.cycles-pp.__vmf_anon_prepare 0.15 ± 5% -0.0 0.12 ± 6% perf-profile.children.cycles-pp.do_unlinkat 0.13 ± 6% -0.0 0.10 ± 4% perf-profile.children.cycles-pp.mas_wr_bnode 0.10 ± 6% -0.0 0.08 ± 8% perf-profile.children.cycles-pp.release_task 0.10 ± 7% -0.0 0.08 ± 7% perf-profile.children.cycles-pp.sched_exec 0.10 ± 8% -0.0 0.08 ± 6% perf-profile.children.cycles-pp.inode_permission 0.08 ± 10% -0.0 0.06 ± 6% perf-profile.children.cycles-pp.irqtime_account_irq 0.08 ± 9% -0.0 0.06 ± 9% perf-profile.children.cycles-pp.pgd_alloc 0.08 ± 6% -0.0 0.06 ± 13% perf-profile.children.cycles-pp.timerqueue_add 0.08 ± 4% -0.0 0.06 ± 8% perf-profile.children.cycles-pp.timerqueue_del 0.11 ± 10% -0.0 0.08 ± 5% perf-profile.children.cycles-pp.__vm_area_free 0.18 ± 7% -0.0 0.16 ± 4% perf-profile.children.cycles-pp.__wp_page_copy_user 0.10 ± 8% -0.0 0.07 ± 6% perf-profile.children.cycles-pp.do_open_execat 0.09 ± 8% -0.0 0.06 ± 7% perf-profile.children.cycles-pp.get_free_pages_noprof 0.11 ± 5% -0.0 0.09 ± 7% perf-profile.children.cycles-pp.rseq_ip_fixup 0.09 ± 7% -0.0 0.07 ± 9% perf-profile.children.cycles-pp.vm_unmapped_area 0.08 ± 10% -0.0 0.06 ± 15% perf-profile.children.cycles-pp.cgroup_rstat_updated 0.08 ± 11% -0.0 0.06 ± 11% perf-profile.children.cycles-pp.tick_irq_enter 0.10 ± 11% -0.0 0.08 ± 8% perf-profile.children.cycles-pp.memcpy_orig 0.12 ± 7% -0.0 0.10 perf-profile.children.cycles-pp.alloc_thread_stack_node 0.08 ± 9% -0.0 0.06 ± 8% perf-profile.children.cycles-pp.native_sched_clock 0.07 ± 9% -0.0 0.05 ± 7% perf-profile.children.cycles-pp.__call_rcu_common 0.07 ± 5% -0.0 0.05 ± 7% perf-profile.children.cycles-pp.perf_event_task_output 0.11 ± 3% -0.0 0.09 ± 4% perf-profile.children.cycles-pp.sync 0.12 ± 7% -0.0 0.10 ± 3% perf-profile.children.cycles-pp.arch_do_signal_or_restart 0.15 ± 3% -0.0 0.13 ± 5% perf-profile.children.cycles-pp.folio_batch_move_lru 0.10 ± 5% -0.0 0.08 perf-profile.children.cycles-pp.handle_pte_fault 0.14 ± 2% -0.0 0.12 ± 3% perf-profile.children.cycles-pp.vm_area_alloc 0.07 ± 6% -0.0 0.06 ± 9% perf-profile.children.cycles-pp.update_irq_load_avg 0.09 ± 6% -0.0 0.07 ± 9% perf-profile.children.cycles-pp.__put_user_8 0.07 -0.0 0.05 ± 7% perf-profile.children.cycles-pp._raw_spin_lock_irq 0.09 ± 5% -0.0 0.08 ± 6% perf-profile.children.cycles-pp.getname_flags 0.07 ± 8% -0.0 0.05 ± 7% perf-profile.children.cycles-pp.unlink_file_vma_batch_final 0.09 ± 6% -0.0 0.07 ± 9% perf-profile.children.cycles-pp.vma_link_file 0.07 ± 14% -0.0 0.05 ± 8% perf-profile.children.cycles-pp.fput 0.08 ± 10% -0.0 0.06 ± 6% perf-profile.children.cycles-pp.shmem_write_begin 0.06 -0.0 0.04 ± 44% perf-profile.children.cycles-pp.terminate_walk 0.10 ± 4% -0.0 0.08 ± 5% perf-profile.children.cycles-pp.__x64_sys_sync 0.12 ± 4% -0.0 0.10 ± 3% perf-profile.children.cycles-pp.kill_something_info 0.10 ± 4% -0.0 0.08 ± 5% perf-profile.children.cycles-pp.ksys_sync 0.08 ± 13% -0.0 0.06 ± 7% perf-profile.children.cycles-pp.task_work_run 0.13 -0.0 0.11 ± 6% perf-profile.children.cycles-pp.try_to_block_task 0.07 ± 15% -0.0 0.05 ± 7% perf-profile.children.cycles-pp.d_path 0.07 ± 5% -0.0 0.05 ± 7% perf-profile.children.cycles-pp.exec_mmap 0.07 ± 10% -0.0 0.05 ± 7% perf-profile.children.cycles-pp.free_unref_page_commit 0.10 ± 4% -0.0 0.09 ± 5% perf-profile.children.cycles-pp.__percpu_counter_init_many 0.15 ± 3% -0.0 0.13 ± 3% perf-profile.children.cycles-pp.lru_add_drain_cpu 0.09 ± 8% -0.0 0.07 perf-profile.children.cycles-pp.shmem_get_folio_gfp 0.09 ± 4% -0.0 0.07 ± 9% perf-profile.children.cycles-pp.try_to_unlazy 0.08 ± 4% -0.0 0.06 ± 7% perf-profile.children.cycles-pp.vma_interval_tree_remove 0.15 ± 4% -0.0 0.13 ± 3% perf-profile.children.cycles-pp.lru_add_drain 0.08 ± 9% -0.0 0.06 ± 6% perf-profile.children.cycles-pp.__pud_alloc 0.07 ± 10% -0.0 0.05 ± 8% perf-profile.children.cycles-pp.create_elf_tables 0.08 ± 6% -0.0 0.06 ± 6% perf-profile.children.cycles-pp.entry_SYSCALL_64_safe_stack 0.08 ± 8% -0.0 0.06 ± 7% perf-profile.children.cycles-pp.shmem_mknod 0.09 ± 10% -0.0 0.07 ± 6% perf-profile.children.cycles-pp.__page_cache_release 0.08 ± 6% -0.0 0.06 ± 6% perf-profile.children.cycles-pp.__memcg_kmem_charge_page 0.07 ± 11% -0.0 0.05 ± 8% perf-profile.children.cycles-pp.pipe_write 0.20 ± 5% +0.0 0.22 ± 2% perf-profile.children.cycles-pp.__close 0.10 ± 5% +0.0 0.12 ± 6% perf-profile.children.cycles-pp.__local_bh_enable_ip 0.24 ± 5% +0.0 0.28 ± 3% perf-profile.children.cycles-pp.__libc_fork 0.37 ± 4% +0.0 0.40 ± 2% perf-profile.children.cycles-pp.read_write_close 0.09 ± 7% +0.0 0.12 ± 5% perf-profile.children.cycles-pp.do_softirq 0.50 ± 3% +0.0 0.54 perf-profile.children.cycles-pp.creat64 0.52 ± 5% +0.0 0.56 ± 2% perf-profile.children.cycles-pp.page_test 0.81 ± 4% +0.0 0.86 ± 2% perf-profile.children.cycles-pp.brk 2.19 ± 2% +0.1 2.29 perf-profile.children.cycles-pp._Fork 1.10 ± 31% +0.5 1.56 ± 11% perf-profile.children.cycles-pp._raw_spin_lock 0.73 ± 43% +0.5 1.24 ± 13% perf-profile.children.cycles-pp.native_queued_spin_lock_slowpath 0.92 ± 38% +0.5 1.43 ± 12% perf-profile.children.cycles-pp.dcache_readdir 0.92 ± 37% +0.5 1.43 ± 12% perf-profile.children.cycles-pp.__x64_sys_getdents64 0.92 ± 37% +0.5 1.43 ± 12% perf-profile.children.cycles-pp.iterate_dir 0.81 ± 42% +0.5 1.34 ± 13% perf-profile.children.cycles-pp.scan_positives 0.80 ± 38% +0.6 1.44 ± 12% perf-profile.children.cycles-pp.getdents64 66.36 +5.5 71.91 perf-profile.children.cycles-pp.sieve 69.78 +5.8 75.54 perf-profile.children.cycles-pp.main 69.74 +5.8 75.52 perf-profile.children.cycles-pp.multiuser 69.73 +5.8 75.51 perf-profile.children.cycles-pp.runloop 6.29 ± 2% -0.6 5.66 ± 2% perf-profile.self.cycles-pp.intel_idle 0.88 ± 2% -0.2 0.68 ± 3% perf-profile.self.cycles-pp.entry_SYSRETQ_unsafe_stack 0.67 ± 5% -0.1 0.54 ± 2% perf-profile.self.cycles-pp.__irqentry_text_end 0.56 ± 3% -0.1 0.45 ± 2% perf-profile.self.cycles-pp.zap_present_ptes 0.41 ± 5% -0.1 0.32 ± 2% perf-profile.self.cycles-pp.syscall_return_via_sysret 0.48 ± 5% -0.1 0.39 ± 3% perf-profile.self.cycles-pp.next_uptodate_folio 0.38 ± 6% -0.1 0.29 ± 3% perf-profile.self.cycles-pp.clear_page_erms 0.40 ± 4% -0.1 0.32 ± 4% perf-profile.self.cycles-pp.entry_SYSCALL_64 0.26 ± 5% -0.1 0.19 ± 3% perf-profile.self.cycles-pp.copy_present_ptes 0.42 ± 9% -0.1 0.35 perf-profile.self.cycles-pp._raw_spin_lock 0.31 ± 4% -0.1 0.25 ± 4% perf-profile.self.cycles-pp.__memcg_slab_post_alloc_hook 0.38 ± 6% -0.1 0.32 ± 5% perf-profile.self.cycles-pp.filemap_map_pages 0.26 ± 5% -0.1 0.21 ± 2% perf-profile.self.cycles-pp.native_irq_return_iret 0.20 ± 9% -0.0 0.15 ± 5% perf-profile.self.cycles-pp.mas_walk 0.24 ± 5% -0.0 0.20 ± 5% perf-profile.self.cycles-pp.folio_remove_rmap_ptes 0.28 ± 4% -0.0 0.24 ± 3% perf-profile.self.cycles-pp.kmem_cache_alloc_noprof 0.24 ± 9% -0.0 0.20 ± 3% perf-profile.self.cycles-pp.___perf_sw_event 0.20 ± 4% -0.0 0.16 ± 2% perf-profile.self.cycles-pp.error_entry 0.12 ± 6% -0.0 0.09 ± 5% perf-profile.self.cycles-pp.menu_select 0.14 ± 19% -0.0 0.10 ± 6% perf-profile.self.cycles-pp.__d_lookup_rcu 0.23 ± 5% -0.0 0.19 ± 6% perf-profile.self.cycles-pp.update_sg_lb_stats 0.20 ± 4% -0.0 0.17 ± 4% perf-profile.self.cycles-pp.update_sg_wakeup_stats 0.21 ± 6% -0.0 0.17 ± 6% perf-profile.self.cycles-pp.__memcg_slab_free_hook 0.21 ± 8% -0.0 0.18 ± 5% perf-profile.self.cycles-pp.kmem_cache_free 0.13 ± 4% -0.0 0.10 ± 8% perf-profile.self.cycles-pp.vma_interval_tree_insert 0.19 ± 3% -0.0 0.16 ± 6% perf-profile.self.cycles-pp.folios_put_refs 0.20 ± 7% -0.0 0.16 ± 7% perf-profile.self.cycles-pp.__handle_mm_fault 0.21 ± 6% -0.0 0.18 ± 5% perf-profile.self.cycles-pp.__slab_free 0.20 ± 6% -0.0 0.17 ± 4% perf-profile.self.cycles-pp.copy_mc_enhanced_fast_string 0.16 ± 7% -0.0 0.13 ± 5% perf-profile.self.cycles-pp.down_write 0.16 ± 9% -0.0 0.13 ± 3% perf-profile.self.cycles-pp.__mod_memcg_lruvec_state 0.15 ± 7% -0.0 0.12 ± 6% perf-profile.self.cycles-pp.up_write 0.10 ± 9% -0.0 0.07 ± 5% perf-profile.self.cycles-pp.cpuidle_enter_state 0.21 ± 4% -0.0 0.18 ± 5% perf-profile.self.cycles-pp.memset_orig 0.15 ± 7% -0.0 0.12 ± 7% perf-profile.self.cycles-pp.zap_pte_range 0.16 ± 3% -0.0 0.13 ± 5% perf-profile.self.cycles-pp.free_pages_and_swap_cache 0.12 ± 9% -0.0 0.09 ± 10% perf-profile.self.cycles-pp.rep_movs_alternative 0.10 ± 6% -0.0 0.07 ± 9% perf-profile.self.cycles-pp.update_load_avg 0.10 ± 4% -0.0 0.08 ± 6% perf-profile.self.cycles-pp.folio_add_file_rmap_ptes 0.08 ± 9% -0.0 0.06 ± 13% perf-profile.self.cycles-pp.cgroup_rstat_updated 0.08 ± 4% -0.0 0.06 ± 11% perf-profile.self.cycles-pp.mas_wr_node_store 0.09 ± 9% -0.0 0.07 ± 5% perf-profile.self.cycles-pp.__cond_resched 0.08 ± 7% -0.0 0.06 ± 6% perf-profile.self.cycles-pp.set_pte_range 0.08 ± 8% -0.0 0.06 ± 6% perf-profile.self.cycles-pp.vma_interval_tree_remove 0.11 ± 7% -0.0 0.09 ± 5% perf-profile.self.cycles-pp._raw_spin_lock_irqsave 0.07 ± 6% -0.0 0.06 ± 6% perf-profile.self.cycles-pp.percpu_counter_add_batch 0.07 ± 7% -0.0 0.05 ± 7% perf-profile.self.cycles-pp._raw_spin_lock_irq 0.07 ± 7% -0.0 0.05 ± 8% perf-profile.self.cycles-pp.update_irq_load_avg 0.06 ± 7% -0.0 0.05 ± 8% perf-profile.self.cycles-pp.__task_pid_nr_ns 0.72 ± 43% +0.5 1.23 ± 13% perf-profile.self.cycles-pp.native_queued_spin_lock_slowpath 65.88 +5.5 71.36 perf-profile.self.cycles-pp.sieve 0.06 ± 62% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.__alloc_pages_noprof.alloc_pages_mpol_noprof.__pmd_alloc.__handle_mm_fault 0.07 ± 25% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.__alloc_pages_noprof.alloc_pages_mpol_noprof.__pmd_alloc.copy_p4d_range 0.12 ± 31% -95.3% 0.01 ±223% perf-sched.sch_delay.avg.ms.__cond_resched.__alloc_pages_noprof.alloc_pages_mpol_noprof.folio_alloc_mpol_noprof.shmem_alloc_folio 0.05 ± 72% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.__alloc_pages_noprof.alloc_pages_mpol_noprof.get_free_pages_noprof.pgd_alloc 0.07 ± 14% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.__alloc_pages_noprof.alloc_pages_mpol_noprof.pte_alloc_one.__pte_alloc 0.10 ± 81% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.__anon_vma_prepare.__vmf_anon_prepare.do_anonymous_page.__handle_mm_fault 0.06 ± 81% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.__anon_vma_prepare.__vmf_anon_prepare.do_fault.__handle_mm_fault 0.10 ± 83% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.__dentry_kill.dput.step_into.link_path_walk 0.03 ± 63% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.__get_user_pages.get_user_pages_remote.get_arg_page.copy_strings 0.12 ± 44% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.__kmalloc_cache_node_noprof.__get_vm_area_node.__vmalloc_node_range_noprof.alloc_thread_stack_node 0.08 ± 27% -97.4% 0.00 ±223% perf-sched.sch_delay.avg.ms.__cond_resched.__kmalloc_cache_noprof.perf_event_mmap_event.perf_event_mmap.mmap_region 0.02 ± 35% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.__kmalloc_noprof.load_elf_phdrs.load_elf_binary.search_binary_handler 0.13 ± 60% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.__kmalloc_noprof.lsm_blob_alloc.security_prepare_creds.prepare_creds 0.08 ± 28% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.__kmalloc_noprof.lsm_blob_alloc.security_task_alloc.copy_process 0.08 ± 75% -86.8% 0.01 ±187% perf-sched.sch_delay.avg.ms.__cond_resched.__kmalloc_noprof.security_inode_init_security.shmem_mknod.lookup_open 0.05 ± 26% -96.9% 0.00 ±142% perf-sched.sch_delay.avg.ms.__cond_resched.__put_anon_vma.unlink_anon_vmas.free_pgtables.exit_mmap 0.17 ± 57% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.__release_sock.release_sock.tcp_sendmsg.sock_write_iter 0.07 ± 10% -99.5% 0.00 ±223% perf-sched.sch_delay.avg.ms.__cond_resched.__tlb_batch_free_encoded_pages.tlb_finish_mmu.exit_mmap.mmput 0.05 ± 58% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.__tlb_batch_free_encoded_pages.tlb_finish_mmu.relocate_vma_down.setup_arg_pages 0.08 ± 50% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.__vmalloc_area_node.__vmalloc_node_range_noprof.alloc_thread_stack_node.dup_task_struct 0.06 ± 8% +2931.0% 1.82 ± 4% perf-sched.sch_delay.avg.ms.__cond_resched.__wait_for_common.affine_move_task.__set_cpus_allowed_ptr.__sched_setaffinity 0.10 ± 28% -86.9% 0.01 ±144% perf-sched.sch_delay.avg.ms.__cond_resched.__wait_for_common.wait_for_completion_state.kernel_clone.__x64_sys_vfork 0.15 ± 54% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.aa_sk_perm.security_socket_recvmsg.sock_recvmsg.sock_read_iter 0.14 ± 33% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.aa_sk_perm.security_socket_sendmsg.sock_write_iter.vfs_write 0.06 ± 46% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.apparmor_file_alloc_security.security_file_alloc.init_file.alloc_empty_file 0.10 ± 57% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.cgroup_css_set_fork.cgroup_can_fork.copy_process.kernel_clone 0.12 ± 61% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.change_pmd_range.isra.0.change_pud_range 0.06 ± 41% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.change_pud_range.isra.0.change_protection_range 0.10 ± 34% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.copy_page_range.dup_mmap.dup_mm.constprop 0.07 ± 64% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.copy_strings.isra.0.do_execveat_common 0.15 ± 85% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.count.constprop.0.isra 0.06 ± 53% -97.1% 0.00 ±145% perf-sched.sch_delay.avg.ms.__cond_resched.down_read.acct_collect.do_exit.do_group_exit 0.10 ± 23% -97.3% 0.00 ±223% perf-sched.sch_delay.avg.ms.__cond_resched.down_read.walk_component.link_path_walk.part 0.11 ± 50% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.down_read.walk_component.path_lookupat.filename_lookup 0.13 ±132% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.down_write.__anon_vma_prepare.__vmf_anon_prepare.do_anonymous_page 0.04 ±122% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.down_write.anon_vma_clone.__split_vma.vma_modify 0.09 ± 18% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.down_write.anon_vma_clone.anon_vma_fork.dup_mmap 0.08 ± 24% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.down_write.anon_vma_fork.dup_mmap.dup_mm 0.16 ± 49% -89.1% 0.02 ±184% perf-sched.sch_delay.avg.ms.__cond_resched.down_write.do_truncate.do_open.path_openat 0.08 ± 16% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.down_write.free_pgtables.exit_mmap.mmput 0.08 ± 41% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.down_write.mmap_region.do_mmap.vm_mmap_pgoff 0.02 ± 46% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.down_write.shmem_file_write_iter.vfs_write.ksys_write 0.10 ± 62% -99.0% 0.00 ±223% perf-sched.sch_delay.avg.ms.__cond_resched.down_write.unlink_anon_vmas.free_pgtables.exit_mmap 0.07 ± 36% -96.1% 0.00 ±155% perf-sched.sch_delay.avg.ms.__cond_resched.down_write.vma_link_file.mmap_region.do_mmap 0.11 ±150% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.down_write.vma_prepare.__split_vma.vma_modify 0.11 ± 45% -76.5% 0.02 ±173% perf-sched.sch_delay.avg.ms.__cond_resched.down_write.vma_prepare.commit_merge.vma_expand 0.14 ± 67% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.down_write.vms_gather_munmap_vmas.do_vmi_align_munmap.do_vmi_munmap 0.05 ±120% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.down_write_killable.create_elf_tables.load_elf_binary.search_binary_handler 0.02 ± 16% -68.9% 0.01 ±112% perf-sched.sch_delay.avg.ms.__cond_resched.down_write_killable.exec_mmap.begin_new_exec.load_elf_binary 0.05 ± 51% -97.5% 0.00 ±223% perf-sched.sch_delay.avg.ms.__cond_resched.down_write_killable.setup_arg_pages.load_elf_binary.search_binary_handler 0.03 ± 89% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.down_write_killable.vm_brk_flags.elf_load.load_elf_binary 0.06 ± 58% -98.7% 0.00 ±223% perf-sched.sch_delay.avg.ms.__cond_resched.down_write_killable.vm_mmap_pgoff.elf_load.load_elf_binary 0.10 ± 33% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.down_write_killable.vm_mmap_pgoff.ksys_mmap_pgoff.do_syscall_64 0.14 ± 70% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.dput.do_linkat.__x64_sys_link.do_syscall_64 0.09 ±110% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.dput.path_put.exit_fs.do_exit 0.14 ± 33% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.dput.path_put.vfs_statx.vfs_fstatat 0.08 ± 17% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.dput.step_into.link_path_walk.part 0.13 ± 65% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.dput.step_into.open_last_lookups.path_openat 0.11 ± 80% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.dput.step_into.path_lookupat.filename_lookup 0.14 ± 73% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.dput.terminate_walk.path_lookupat.filename_lookup 0.04 ±130% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.exit_signals.do_exit.do_group_exit.__x64_sys_exit_group 0.05 ± 54% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.filemap_read.__kernel_read.search_binary_handler.exec_binprm 0.14 ± 20% -87.1% 0.02 ±150% perf-sched.sch_delay.avg.ms.__cond_resched.generic_perform_write.shmem_file_write_iter.vfs_write.ksys_write 0.09 ± 41% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.kmem_cache_alloc_lru_noprof.__d_alloc.d_alloc.d_alloc_parallel 0.07 ± 39% -93.0% 0.00 ±223% perf-sched.sch_delay.avg.ms.__cond_resched.kmem_cache_alloc_node_noprof.__alloc_skb.alloc_skb_with_frags.sock_alloc_send_pskb 0.11 ± 72% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.kmem_cache_alloc_node_noprof.dup_task_struct.copy_process.kernel_clone 0.10 ± 35% -93.8% 0.01 ±182% perf-sched.sch_delay.avg.ms.__cond_resched.kmem_cache_alloc_noprof.alloc_pid.copy_process.kernel_clone 0.10 ± 25% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.kmem_cache_alloc_noprof.anon_vma_fork.dup_mmap.dup_mm 0.09 ± 37% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.kmem_cache_alloc_noprof.copy_fs_struct.copy_process.kernel_clone 0.07 ± 92% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.kmem_cache_alloc_noprof.copy_signal.copy_process.kernel_clone 0.05 ± 54% -96.3% 0.00 ±223% perf-sched.sch_delay.avg.ms.__cond_resched.kmem_cache_alloc_noprof.dup_mm.constprop.0 0.14 ± 88% -97.8% 0.00 ±223% perf-sched.sch_delay.avg.ms.__cond_resched.kmem_cache_alloc_noprof.mas_alloc_nodes.mas_preallocate.__split_vma 0.08 ± 25% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.kmem_cache_alloc_noprof.mas_alloc_nodes.mas_preallocate.mmap_region 0.08 ±121% -99.2% 0.00 ±223% perf-sched.sch_delay.avg.ms.__cond_resched.kmem_cache_alloc_noprof.mas_alloc_nodes.mas_preallocate.vma_link 0.09 ± 18% -93.4% 0.01 ±223% perf-sched.sch_delay.avg.ms.__cond_resched.kmem_cache_alloc_noprof.prepare_creds.copy_creds.copy_process 0.08 ± 49% -90.9% 0.01 ±223% perf-sched.sch_delay.avg.ms.__cond_resched.kmem_cache_alloc_noprof.security_inode_alloc.inode_init_always.alloc_inode 0.03 ±106% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.kmem_cache_alloc_noprof.vm_area_alloc.__bprm_mm_init.alloc_bprm 0.07 ± 85% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.kmem_cache_alloc_noprof.vm_area_alloc._install_special_mapping.map_vdso 0.10 ± 72% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.kmem_cache_alloc_noprof.vm_area_alloc.do_brk_flags.__do_sys_brk 0.09 ± 77% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.kmem_cache_alloc_noprof.vm_area_dup.__split_vma.vma_modify 0.08 ± 41% -91.9% 0.01 ±148% perf-sched.sch_delay.avg.ms.__cond_resched.kmem_cache_alloc_noprof.vm_area_dup.__split_vma.vms_gather_munmap_vmas 0.09 ± 11% -92.8% 0.01 ±223% perf-sched.sch_delay.avg.ms.__cond_resched.kmem_cache_alloc_noprof.vm_area_dup.dup_mmap.dup_mm 0.11 ±112% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.mmput.exec_mmap.begin_new_exec.load_elf_binary 0.09 ± 96% -97.5% 0.00 ±223% perf-sched.sch_delay.avg.ms.__cond_resched.mnt_want_write.do_unlinkat.__x64_sys_unlink.do_syscall_64 0.08 ± 74% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.mnt_want_write.open_last_lookups.path_openat.do_filp_open 0.02 ± 56% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.mutex_lock.futex_exec_release.exec_mm_release.exec_mmap 0.06 ± 49% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.mutex_lock.futex_exit_release.exit_mm_release.exit_mm 0.11 ± 68% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.mutex_lock.pipe_read.vfs_read.ksys_read 0.06 ± 36% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.mutex_lock_killable.pcpu_alloc_noprof.__percpu_counter_init_many.mm_init 0.02 ± 63% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.mutex_lock_killable.pcpu_alloc_noprof.mm_init.alloc_bprm 0.07 ± 24% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.mutex_lock_killable.pcpu_alloc_noprof.mm_init.dup_mm 0.09 ± 39% -99.5% 0.00 ±223% perf-sched.sch_delay.avg.ms.__cond_resched.put_files_struct.do_exit.do_group_exit.__x64_sys_exit_group 0.06 ± 24% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.remove_vma.exit_mmap.mmput.exit_mm 0.24 ±211% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.remove_vma.vms_complete_munmap_vmas.do_vmi_align_munmap.do_vmi_munmap 0.08 ± 68% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.remove_vma.vms_complete_munmap_vmas.mmap_region.do_mmap 0.09 ± 90% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.shmem_get_folio_gfp.shmem_file_read_iter.vfs_read.ksys_read 0.08 ±118% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.shmem_get_folio_gfp.shmem_write_begin.generic_perform_write.shmem_file_write_iter 0.14 ± 72% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.shmem_undo_range.shmem_evict_inode.evict.__dentry_kill 0.06 ± 6% +217.5% 0.19 ± 16% perf-sched.sch_delay.avg.ms.__cond_resched.stop_one_cpu.sched_exec.bprm_execve.part 0.11 ± 30% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.super_lock.iterate_supers.ksys_sync.__x64_sys_sync 0.15 ±162% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.switch_task_namespaces.do_exit.do_group_exit.__x64_sys_exit_group 0.06 ± 71% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.task_work_run.do_exit.do_group_exit.__x64_sys_exit_group 0.18 ± 88% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.task_work_run.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe 0.06 ± 30% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.unmap_vmas.exit_mmap.mmput.exit_mm 0.07 ± 60% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.unmap_vmas.vms_clear_ptes.vms_clean_up_area.mmap_region 0.13 ± 73% -99.3% 0.00 ±223% perf-sched.sch_delay.avg.ms.__cond_resched.unmap_vmas.vms_clear_ptes.vms_complete_munmap_vmas.mmap_region 0.09 ± 27% -100.0% 0.00 perf-sched.sch_delay.avg.ms.__cond_resched.uprobe_start_dup_mmap.dup_mmap.dup_mm.constprop 0.12 +18.7% 0.15 perf-sched.sch_delay.avg.ms.do_wait.kernel_wait4.__do_sys_wait4.do_syscall_64 0.09 ± 16% -44.7% 0.05 ± 57% perf-sched.sch_delay.avg.ms.irqentry_exit_to_user_mode.asm_sysvec_apic_timer_interrupt.[unknown] 0.12 ± 4% -23.5% 0.10 ± 15% perf-sched.sch_delay.avg.ms.irqentry_exit_to_user_mode.asm_sysvec_apic_timer_interrupt.[unknown].[unknown] 0.09 ± 24% -97.0% 0.00 ±223% perf-sched.sch_delay.avg.ms.irqentry_exit_to_user_mode.asm_sysvec_call_function_single.[unknown] 0.11 ± 14% -93.8% 0.01 ±182% perf-sched.sch_delay.avg.ms.irqentry_exit_to_user_mode.asm_sysvec_call_function_single.[unknown].[unknown] 0.09 ± 26% -92.4% 0.01 ±223% perf-sched.sch_delay.avg.ms.irqentry_exit_to_user_mode.asm_sysvec_reschedule_ipi.[unknown] 0.14 ± 2% -42.0% 0.08 ± 37% perf-sched.sch_delay.avg.ms.irqentry_exit_to_user_mode.asm_sysvec_reschedule_ipi.[unknown].[unknown] 0.06 ± 2% +19.8% 0.08 ± 3% perf-sched.sch_delay.avg.ms.pipe_read.vfs_read.ksys_read.do_syscall_64 0.06 ± 12% +188.2% 0.17 ± 40% perf-sched.sch_delay.avg.ms.schedule_hrtimeout_range_clock.ep_poll.do_epoll_wait.__x64_sys_epoll_wait 0.01 ± 72% -100.0% 0.00 perf-sched.sch_delay.avg.ms.schedule_preempt_disabled.rwsem_down_write_slowpath.down_write.unlink_file_vma_batch_final 0.08 ± 2% +74.7% 0.13 ± 5% perf-sched.sch_delay.avg.ms.schedule_timeout.__wait_for_common.wait_for_completion_state.kernel_clone 0.02 ± 22% +9365.4% 2.15 ± 43% perf-sched.sch_delay.avg.ms.schedule_timeout.io_schedule_timeout.__wait_for_common.blk_execute_rq 0.07 ± 28% +480.6% 0.39 ± 57% perf-sched.sch_delay.avg.ms.schedule_timeout.kcompactd.kthread.ret_from_fork 0.04 ± 2% +417.9% 0.22 ± 5% perf-sched.sch_delay.avg.ms.schedule_timeout.rcu_gp_fqs_loop.rcu_gp_kthread.kthread 0.02 ± 6% +4557.3% 0.91 perf-sched.sch_delay.avg.ms.smpboot_thread_fn.kthread.ret_from_fork.ret_from_fork_asm 0.04 ± 36% -97.3% 0.00 ±223% perf-sched.sch_delay.avg.ms.syscall_exit_to_user_mode.ret_from_fork_asm.[unknown] 0.10 ± 35% -100.0% 0.00 perf-sched.sch_delay.avg.ms.syscall_exit_to_user_mode.ret_from_fork_asm.[unknown].[unknown] 0.05 ± 4% +2256.8% 1.22 ± 3% perf-sched.sch_delay.avg.ms.worker_thread.kthread.ret_from_fork.ret_from_fork_asm 0.27 ± 80% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.__alloc_pages_noprof.alloc_pages_mpol_noprof.__pmd_alloc.__handle_mm_fault 0.26 ± 19% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.__alloc_pages_noprof.alloc_pages_mpol_noprof.__pmd_alloc.copy_p4d_range 0.29 ± 38% -98.0% 0.01 ±223% perf-sched.sch_delay.max.ms.__cond_resched.__alloc_pages_noprof.alloc_pages_mpol_noprof.folio_alloc_mpol_noprof.shmem_alloc_folio 0.10 ± 80% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.__alloc_pages_noprof.alloc_pages_mpol_noprof.get_free_pages_noprof.pgd_alloc 0.41 ± 34% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.__alloc_pages_noprof.alloc_pages_mpol_noprof.pte_alloc_one.__pte_alloc 0.47 ±105% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.__anon_vma_prepare.__vmf_anon_prepare.do_anonymous_page.__handle_mm_fault 0.17 ± 90% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.__anon_vma_prepare.__vmf_anon_prepare.do_fault.__handle_mm_fault 0.31 ± 74% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.__dentry_kill.dput.step_into.link_path_walk 0.49 ± 69% -94.2% 0.03 ±223% perf-sched.sch_delay.max.ms.__cond_resched.__do_fault.do_read_fault.do_fault.__handle_mm_fault 0.07 ± 73% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.__get_user_pages.get_user_pages_remote.get_arg_page.copy_strings 0.22 ± 23% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.__kmalloc_cache_node_noprof.__get_vm_area_node.__vmalloc_node_range_noprof.alloc_thread_stack_node 0.57 ± 42% -99.6% 0.00 ±223% perf-sched.sch_delay.max.ms.__cond_resched.__kmalloc_cache_noprof.perf_event_mmap_event.perf_event_mmap.mmap_region 0.07 ± 31% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.__kmalloc_noprof.load_elf_phdrs.load_elf_binary.search_binary_handler 0.49 ± 60% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.__kmalloc_noprof.lsm_blob_alloc.security_prepare_creds.prepare_creds 0.21 ± 36% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.__kmalloc_noprof.lsm_blob_alloc.security_task_alloc.copy_process 0.13 ± 82% -91.7% 0.01 ±187% perf-sched.sch_delay.max.ms.__cond_resched.__kmalloc_noprof.security_inode_init_security.shmem_mknod.lookup_open 0.66 ± 41% -99.8% 0.00 ±142% perf-sched.sch_delay.max.ms.__cond_resched.__put_anon_vma.unlink_anon_vmas.free_pgtables.exit_mmap 0.23 ± 37% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.__release_sock.release_sock.tcp_sendmsg.sock_write_iter 1.38 ± 46% -100.0% 0.00 ±223% perf-sched.sch_delay.max.ms.__cond_resched.__tlb_batch_free_encoded_pages.tlb_finish_mmu.exit_mmap.mmput 0.19 ± 84% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.__tlb_batch_free_encoded_pages.tlb_finish_mmu.relocate_vma_down.setup_arg_pages 0.25 ± 60% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.__vmalloc_area_node.__vmalloc_node_range_noprof.alloc_thread_stack_node.dup_task_struct 2.77 ± 42% +823.0% 25.54 ± 6% perf-sched.sch_delay.max.ms.__cond_resched.__wait_for_common.affine_move_task.__set_cpus_allowed_ptr.__sched_setaffinity 1.08 ± 29% -98.8% 0.01 ±144% perf-sched.sch_delay.max.ms.__cond_resched.__wait_for_common.wait_for_completion_state.kernel_clone.__x64_sys_vfork 0.37 ± 65% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.aa_sk_perm.security_socket_recvmsg.sock_recvmsg.sock_read_iter 0.28 ± 51% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.aa_sk_perm.security_socket_sendmsg.sock_write_iter.vfs_write 0.19 ± 60% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.apparmor_file_alloc_security.security_file_alloc.init_file.alloc_empty_file 0.59 ±149% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.cgroup_css_set_fork.cgroup_can_fork.copy_process.kernel_clone 0.18 ± 70% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.change_pmd_range.isra.0.change_pud_range 0.41 ± 43% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.change_pud_range.isra.0.change_protection_range 0.22 ± 35% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.copy_page_range.dup_mmap.dup_mm.constprop 0.82 ± 30% -95.8% 0.03 ±207% perf-sched.sch_delay.max.ms.__cond_resched.copy_pte_range.copy_p4d_range.copy_page_range.dup_mmap 0.50 ± 54% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.copy_strings.isra.0.do_execveat_common 0.34 ± 93% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.count.constprop.0.isra 0.56 ± 64% -99.7% 0.00 ±145% perf-sched.sch_delay.max.ms.__cond_resched.down_read.acct_collect.do_exit.do_group_exit 0.58 ± 40% -99.5% 0.00 ±223% perf-sched.sch_delay.max.ms.__cond_resched.down_read.walk_component.link_path_walk.part 0.35 ± 85% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.down_read.walk_component.path_lookupat.filename_lookup 0.19 ± 87% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.down_write.__anon_vma_prepare.__vmf_anon_prepare.do_anonymous_page 0.11 ±145% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.down_write.anon_vma_clone.__split_vma.vma_modify 0.38 ± 21% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.down_write.anon_vma_clone.anon_vma_fork.dup_mmap 0.22 ± 21% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.down_write.anon_vma_fork.dup_mmap.dup_mm 0.87 ±110% -97.0% 0.03 ±197% perf-sched.sch_delay.max.ms.__cond_resched.down_write.do_truncate.do_open.path_openat 0.48 ± 42% -93.4% 0.03 ±223% perf-sched.sch_delay.max.ms.__cond_resched.down_write.dup_mmap.dup_mm.constprop 1.03 ± 50% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.down_write.free_pgtables.exit_mmap.mmput 0.27 ± 45% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.down_write.mmap_region.do_mmap.vm_mmap_pgoff 0.02 ± 43% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.down_write.shmem_file_write_iter.vfs_write.ksys_write 0.50 ± 48% -99.8% 0.00 ±223% perf-sched.sch_delay.max.ms.__cond_resched.down_write.unlink_anon_vmas.free_pgtables.exit_mmap 0.37 ± 46% -99.3% 0.00 ±155% perf-sched.sch_delay.max.ms.__cond_resched.down_write.vma_link_file.mmap_region.do_mmap 0.27 ±147% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.down_write.vma_prepare.__split_vma.vma_modify 0.24 ± 83% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.down_write.vms_gather_munmap_vmas.do_vmi_align_munmap.do_vmi_munmap 0.17 ±154% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.down_write_killable.create_elf_tables.load_elf_binary.search_binary_handler 1.06 ± 27% -99.2% 0.01 ±125% perf-sched.sch_delay.max.ms.__cond_resched.down_write_killable.exec_mmap.begin_new_exec.load_elf_binary 0.17 ± 52% -99.2% 0.00 ±223% perf-sched.sch_delay.max.ms.__cond_resched.down_write_killable.setup_arg_pages.load_elf_binary.search_binary_handler 0.07 ±120% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.down_write_killable.vm_brk_flags.elf_load.load_elf_binary 0.30 ± 71% -99.7% 0.00 ±223% perf-sched.sch_delay.max.ms.__cond_resched.down_write_killable.vm_mmap_pgoff.elf_load.load_elf_binary 0.41 ± 44% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.down_write_killable.vm_mmap_pgoff.ksys_mmap_pgoff.do_syscall_64 0.30 ± 55% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.dput.do_linkat.__x64_sys_link.do_syscall_64 0.26 ± 79% -88.4% 0.03 ±223% perf-sched.sch_delay.max.ms.__cond_resched.dput.open_last_lookups.path_openat.do_filp_open 0.24 ±126% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.dput.path_put.exit_fs.do_exit 0.44 ± 41% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.dput.path_put.vfs_statx.vfs_fstatat 1.12 ± 34% -84.3% 0.18 ± 53% perf-sched.sch_delay.max.ms.__cond_resched.dput.scan_positives.dcache_readdir.iterate_dir 0.22 ± 45% -82.5% 0.04 ±210% perf-sched.sch_delay.max.ms.__cond_resched.dput.shmem_unlink.vfs_unlink.do_unlinkat 0.61 ± 43% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.dput.step_into.link_path_walk.part 0.23 ± 46% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.dput.step_into.open_last_lookups.path_openat 0.38 ± 97% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.dput.step_into.path_lookupat.filename_lookup 0.33 ± 88% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.dput.terminate_walk.path_lookupat.filename_lookup 0.91 ±116% -88.9% 0.10 ± 95% perf-sched.sch_delay.max.ms.__cond_resched.dput.terminate_walk.path_openat.do_filp_open 1.21 ± 37% -94.3% 0.07 ±218% perf-sched.sch_delay.max.ms.__cond_resched.exit_mmap.mmput.exit_mm.do_exit 0.09 ±151% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.exit_signals.do_exit.do_group_exit.__x64_sys_exit_group 0.28 ± 75% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.filemap_read.__kernel_read.search_binary_handler.exec_binprm 0.89 ± 14% -93.2% 0.06 ±146% perf-sched.sch_delay.max.ms.__cond_resched.generic_perform_write.shmem_file_write_iter.vfs_write.ksys_write 0.30 ± 35% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.kmem_cache_alloc_lru_noprof.__d_alloc.d_alloc.d_alloc_parallel 0.13 ± 63% -96.3% 0.00 ±223% perf-sched.sch_delay.max.ms.__cond_resched.kmem_cache_alloc_node_noprof.__alloc_skb.alloc_skb_with_frags.sock_alloc_send_pskb 0.19 ± 62% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.kmem_cache_alloc_node_noprof.dup_task_struct.copy_process.kernel_clone 0.31 ± 54% -98.1% 0.01 ±182% perf-sched.sch_delay.max.ms.__cond_resched.kmem_cache_alloc_noprof.alloc_pid.copy_process.kernel_clone 0.98 ±112% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.kmem_cache_alloc_noprof.anon_vma_fork.dup_mmap.dup_mm 0.40 ± 59% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.kmem_cache_alloc_noprof.copy_fs_struct.copy_process.kernel_clone 0.08 ± 77% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.kmem_cache_alloc_noprof.copy_signal.copy_process.kernel_clone 0.09 ± 75% -98.2% 0.00 ±223% perf-sched.sch_delay.max.ms.__cond_resched.kmem_cache_alloc_noprof.dup_mm.constprop.0 0.51 ± 89% -99.4% 0.00 ±223% perf-sched.sch_delay.max.ms.__cond_resched.kmem_cache_alloc_noprof.mas_alloc_nodes.mas_preallocate.__split_vma 0.51 ± 57% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.kmem_cache_alloc_noprof.mas_alloc_nodes.mas_preallocate.mmap_region 0.13 ±102% -99.5% 0.00 ±223% perf-sched.sch_delay.max.ms.__cond_resched.kmem_cache_alloc_noprof.mas_alloc_nodes.mas_preallocate.vma_link 0.58 ± 34% -98.9% 0.01 ±223% perf-sched.sch_delay.max.ms.__cond_resched.kmem_cache_alloc_noprof.prepare_creds.copy_creds.copy_process 0.22 ± 61% -96.7% 0.01 ±223% perf-sched.sch_delay.max.ms.__cond_resched.kmem_cache_alloc_noprof.security_inode_alloc.inode_init_always.alloc_inode 0.08 ± 77% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.kmem_cache_alloc_noprof.vm_area_alloc.__bprm_mm_init.alloc_bprm 0.27 ±117% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.kmem_cache_alloc_noprof.vm_area_alloc._install_special_mapping.map_vdso 0.17 ± 80% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.kmem_cache_alloc_noprof.vm_area_alloc.do_brk_flags.__do_sys_brk 0.93 ± 31% -95.1% 0.05 ±219% perf-sched.sch_delay.max.ms.__cond_resched.kmem_cache_alloc_noprof.vm_area_alloc.mmap_region.do_mmap 0.35 ± 62% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.kmem_cache_alloc_noprof.vm_area_dup.__split_vma.vma_modify 0.29 ± 33% -97.0% 0.01 ±159% perf-sched.sch_delay.max.ms.__cond_resched.kmem_cache_alloc_noprof.vm_area_dup.__split_vma.vms_gather_munmap_vmas 0.64 ± 43% -99.0% 0.01 ±223% perf-sched.sch_delay.max.ms.__cond_resched.kmem_cache_alloc_noprof.vm_area_dup.dup_mmap.dup_mm 0.18 ±105% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.mmput.exec_mmap.begin_new_exec.load_elf_binary 0.51 ± 58% -78.7% 0.11 ± 76% perf-sched.sch_delay.max.ms.__cond_resched.mnt_want_write.do_open.path_openat.do_filp_open 0.22 ± 97% -99.0% 0.00 ±223% perf-sched.sch_delay.max.ms.__cond_resched.mnt_want_write.do_unlinkat.__x64_sys_unlink.do_syscall_64 0.12 ± 74% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.mnt_want_write.open_last_lookups.path_openat.do_filp_open 0.03 ± 84% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.mutex_lock.futex_exec_release.exec_mm_release.exec_mmap 0.25 ± 64% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.mutex_lock.futex_exit_release.exit_mm_release.exit_mm 0.17 ± 73% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.mutex_lock.pipe_read.vfs_read.ksys_read 0.65 ± 97% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.mutex_lock_killable.pcpu_alloc_noprof.__percpu_counter_init_many.mm_init 0.05 ± 98% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.mutex_lock_killable.pcpu_alloc_noprof.mm_init.alloc_bprm 0.19 ± 39% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.mutex_lock_killable.pcpu_alloc_noprof.mm_init.dup_mm 0.40 ± 43% -99.9% 0.00 ±223% perf-sched.sch_delay.max.ms.__cond_resched.put_files_struct.do_exit.do_group_exit.__x64_sys_exit_group 0.28 ± 19% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.remove_vma.exit_mmap.mmput.exit_mm 0.24 ±207% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.remove_vma.vms_complete_munmap_vmas.do_vmi_align_munmap.do_vmi_munmap 0.25 ± 72% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.remove_vma.vms_complete_munmap_vmas.mmap_region.do_mmap 0.18 ± 86% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.shmem_get_folio_gfp.shmem_file_read_iter.vfs_read.ksys_read 0.24 ±144% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.shmem_get_folio_gfp.shmem_write_begin.generic_perform_write.shmem_file_write_iter 0.26 ± 38% -82.5% 0.05 ±136% perf-sched.sch_delay.max.ms.__cond_resched.shmem_inode_acct_blocks.shmem_alloc_and_add_folio.shmem_get_folio_gfp.shmem_write_begin 0.48 ± 77% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.shmem_undo_range.shmem_evict_inode.evict.__dentry_kill 3.05 ± 27% +1002.5% 33.63 ± 17% perf-sched.sch_delay.max.ms.__cond_resched.stop_one_cpu.sched_exec.bprm_execve.part 0.34 ± 52% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.super_lock.iterate_supers.ksys_sync.__x64_sys_sync 0.31 ±167% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.switch_task_namespaces.do_exit.do_group_exit.__x64_sys_exit_group 0.10 ± 61% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.task_work_run.do_exit.do_group_exit.__x64_sys_exit_group 0.26 ±123% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.task_work_run.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe 0.34 ± 49% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.unmap_vmas.exit_mmap.mmput.exit_mm 0.14 ± 72% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.unmap_vmas.vms_clear_ptes.vms_clean_up_area.mmap_region 0.36 ± 81% -99.7% 0.00 ±223% perf-sched.sch_delay.max.ms.__cond_resched.unmap_vmas.vms_clear_ptes.vms_complete_munmap_vmas.mmap_region 0.40 ± 37% -100.0% 0.00 perf-sched.sch_delay.max.ms.__cond_resched.uprobe_start_dup_mmap.dup_mmap.dup_mm.constprop 0.19 ± 43% -86.5% 0.02 ±156% perf-sched.sch_delay.max.ms.__cond_resched.vfs_write.ksys_write.do_syscall_64.entry_SYSCALL_64_after_hwframe 0.58 ± 34% -87.9% 0.07 ±213% perf-sched.sch_delay.max.ms.__cond_resched.wp_page_copy.__handle_mm_fault.handle_mm_fault.do_user_addr_fault 1.40 ± 31% -88.9% 0.15 ±133% perf-sched.sch_delay.max.ms.__cond_resched.zap_pmd_range.isra.0.unmap_page_range 3.97 ± 32% +285.0% 15.30 ± 31% perf-sched.sch_delay.max.ms.do_wait.kernel_wait4.__do_sys_wait4.do_syscall_64 1.01 ± 31% +508.2% 6.14 ± 99% perf-sched.sch_delay.max.ms.io_schedule.folio_wait_bit_common.filemap_fault.__do_fault 1.06 ± 11% -88.7% 0.12 ± 71% perf-sched.sch_delay.max.ms.irqentry_exit_to_user_mode.asm_sysvec_apic_timer_interrupt.[unknown] 0.43 ± 44% -99.4% 0.00 ±223% perf-sched.sch_delay.max.ms.irqentry_exit_to_user_mode.asm_sysvec_call_function_single.[unknown] 1.15 ± 14% -99.3% 0.01 ±171% perf-sched.sch_delay.max.ms.irqentry_exit_to_user_mode.asm_sysvec_call_function_single.[unknown].[unknown] 0.84 ± 51% -99.1% 0.01 ±223% perf-sched.sch_delay.max.ms.irqentry_exit_to_user_mode.asm_sysvec_reschedule_ipi.[unknown] 1.59 ± 25% -63.9% 0.57 ± 60% perf-sched.sch_delay.max.ms.irqentry_exit_to_user_mode.asm_sysvec_reschedule_ipi.[unknown].[unknown] 3.05 ± 20% +565.2% 20.27 ± 69% perf-sched.sch_delay.max.ms.pipe_read.vfs_read.ksys_read.do_syscall_64 0.20 ± 55% +2047.2% 4.20 ± 45% perf-sched.sch_delay.max.ms.schedule_hrtimeout_range_clock.ep_poll.do_epoll_wait.__x64_sys_epoll_wait 0.19 ± 49% -86.6% 0.03 ±151% perf-sched.sch_delay.max.ms.schedule_preempt_disabled.rwsem_down_write_slowpath.down_write.dup_mmap 0.04 ±108% -100.0% 0.00 perf-sched.sch_delay.max.ms.schedule_preempt_disabled.rwsem_down_write_slowpath.down_write.unlink_file_vma_batch_final 3.80 ± 32% +567.5% 25.38 ± 20% perf-sched.sch_delay.max.ms.schedule_timeout.__wait_for_common.wait_for_completion_state.kernel_clone 0.03 ± 24% +15155.4% 4.96 ± 38% perf-sched.sch_delay.max.ms.schedule_timeout.io_schedule_timeout.__wait_for_common.blk_execute_rq 0.21 ± 92% +1638.3% 3.70 ± 62% perf-sched.sch_delay.max.ms.schedule_timeout.kcompactd.kthread.ret_from_fork 0.78 ± 36% +707.2% 6.33 ± 14% perf-sched.sch_delay.max.ms.schedule_timeout.rcu_gp_fqs_loop.rcu_gp_kthread.kthread 2.43 ± 29% +226.7% 7.94 ± 11% perf-sched.sch_delay.max.ms.smpboot_thread_fn.kthread.ret_from_fork.ret_from_fork_asm 2.52 ± 29% -64.9% 0.89 ± 83% perf-sched.sch_delay.max.ms.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe.[unknown] 0.39 ± 65% -99.7% 0.00 ±223% perf-sched.sch_delay.max.ms.syscall_exit_to_user_mode.ret_from_fork_asm.[unknown] 0.54 ± 43% -100.0% 0.00 perf-sched.sch_delay.max.ms.syscall_exit_to_user_mode.ret_from_fork_asm.[unknown].[unknown] 4.74 ± 91% +283.4% 18.17 ± 25% perf-sched.sch_delay.max.ms.worker_thread.kthread.ret_from_fork.ret_from_fork_asm 0.06 +218.9% 0.20 perf-sched.total_sch_delay.average.ms 6.21 ± 60% +512.8% 38.05 ± 19% perf-sched.total_sch_delay.max.ms 11.78 ± 6% +19.6% 14.08 perf-sched.total_wait_and_delay.average.ms 78829 ± 6% -35.4% 50952 perf-sched.total_wait_and_delay.count.ms 11.72 ± 6% +18.5% 13.89 perf-sched.total_wait_time.average.ms 13.85 ± 3% +48.1% 20.52 ± 11% perf-sched.wait_and_delay.avg.ms.__cond_resched.__wait_for_common.affine_move_task.__set_cpus_allowed_ptr.__sched_setaffinity 45.00 ± 63% -100.0% 0.00 perf-sched.wait_and_delay.avg.ms.__cond_resched.dput.scan_positives.dcache_readdir.iterate_dir 0.06 ± 5% +189.0% 0.19 ± 16% perf-sched.wait_and_delay.avg.ms.__cond_resched.stop_one_cpu.sched_exec.bprm_execve.part 105.66 ± 3% +16.2% 122.81 ± 3% perf-sched.wait_and_delay.avg.ms.devkmsg_read.vfs_read.ksys_read.do_syscall_64 3.49 ± 47% -100.0% 0.00 perf-sched.wait_and_delay.avg.ms.irqentry_exit_to_user_mode.asm_sysvec_reschedule_ipi.[unknown].[unknown] 10.58 ± 7% +29.6% 13.71 perf-sched.wait_and_delay.avg.ms.pipe_read.vfs_read.ksys_read.do_syscall_64 424.15 ± 11% +49.5% 633.95 ± 6% perf-sched.wait_and_delay.avg.ms.schedule_hrtimeout_range_clock.do_poll.constprop.0.do_sys_poll 82.50 ± 68% -75.1% 20.55 ± 90% perf-sched.wait_and_delay.avg.ms.schedule_preempt_disabled.rwsem_down_write_slowpath.down_write.do_unlinkat 70.96 ± 5% +13.0% 80.21 perf-sched.wait_and_delay.avg.ms.smpboot_thread_fn.kthread.ret_from_fork.ret_from_fork_asm 75.58 ± 8% +29.3% 97.75 ± 4% perf-sched.wait_and_delay.avg.ms.worker_thread.kthread.ret_from_fork.ret_from_fork_asm 70.17 ± 16% -100.0% 0.00 perf-sched.wait_and_delay.count.__cond_resched.dput.scan_positives.dcache_readdir.iterate_dir 18.67 ± 18% -90.2% 1.83 ± 85% perf-sched.wait_and_delay.count.__cond_resched.generic_perform_write.shmem_file_write_iter.vfs_write.ksys_write 753.00 ± 5% -99.9% 0.50 ±152% perf-sched.wait_and_delay.count.__cond_resched.smpboot_thread_fn.kthread.ret_from_fork.ret_from_fork_asm 2179 ± 6% -36.1% 1393 perf-sched.wait_and_delay.count.__cond_resched.stop_one_cpu.sched_exec.bprm_execve.part 21418 ± 6% -29.7% 15061 perf-sched.wait_and_delay.count.do_task_dead.do_exit.do_group_exit.__x64_sys_exit_group.x64_sys_call 21231 ± 7% -30.8% 14692 perf-sched.wait_and_delay.count.do_wait.kernel_wait4.__do_sys_wait4.do_syscall_64 86.83 ± 10% -99.2% 0.67 ±223% perf-sched.wait_and_delay.count.irqentry_exit_to_user_mode.asm_sysvec_apic_timer_interrupt.[unknown] 1590 ± 5% -65.0% 556.33 ± 3% perf-sched.wait_and_delay.count.irqentry_exit_to_user_mode.asm_sysvec_apic_timer_interrupt.[unknown].[unknown] 792.33 ± 8% -100.0% 0.00 perf-sched.wait_and_delay.count.irqentry_exit_to_user_mode.asm_sysvec_reschedule_ipi.[unknown].[unknown] 7701 ± 6% -19.5% 6199 perf-sched.wait_and_delay.count.pipe_read.vfs_read.ksys_read.do_syscall_64 77.83 ± 7% -28.5% 55.67 ± 6% perf-sched.wait_and_delay.count.schedule_hrtimeout_range_clock.do_poll.constprop.0.do_sys_poll 129.67 ± 9% -28.7% 92.50 ± 18% perf-sched.wait_and_delay.count.schedule_preempt_disabled.rwsem_down_read_slowpath.down_read.walk_component 6624 ± 5% -18.5% 5402 perf-sched.wait_and_delay.count.schedule_timeout.__wait_for_common.wait_for_completion_state.kernel_clone 5597 ± 6% -34.1% 3690 perf-sched.wait_and_delay.count.smpboot_thread_fn.kthread.ret_from_fork.ret_from_fork_asm 2031 ± 11% -99.5% 10.00 ±101% perf-sched.wait_and_delay.count.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe.[unknown] 2934 ± 7% -36.3% 1867 ± 2% perf-sched.wait_and_delay.count.worker_thread.kthread.ret_from_fork.ret_from_fork_asm 1005 -100.0% 0.00 perf-sched.wait_and_delay.max.ms.__cond_resched.dput.scan_positives.dcache_readdir.iterate_dir 2375 ± 7% -98.2% 43.16 ±148% perf-sched.wait_and_delay.max.ms.__cond_resched.smpboot_thread_fn.kthread.ret_from_fork.ret_from_fork_asm 4.43 ± 50% +658.9% 33.63 ± 17% perf-sched.wait_and_delay.max.ms.__cond_resched.stop_one_cpu.sched_exec.bprm_execve.part 1796 ± 22% -35.8% 1152 ± 25% perf-sched.wait_and_delay.max.ms.do_task_dead.do_exit.do_group_exit.__x64_sys_exit_group.x64_sys_call 2200 ± 54% -92.4% 167.58 ±223% perf-sched.wait_and_delay.max.ms.irqentry_exit_to_user_mode.asm_sysvec_apic_timer_interrupt.[unknown] 839.75 ± 44% -100.0% 0.00 perf-sched.wait_and_delay.max.ms.irqentry_exit_to_user_mode.asm_sysvec_reschedule_ipi.[unknown].[unknown] 2744 ± 3% -36.5% 1741 ± 15% perf-sched.wait_and_delay.max.ms.smpboot_thread_fn.kthread.ret_from_fork.ret_from_fork_asm 2864 ± 23% -30.6% 1988 ± 11% perf-sched.wait_and_delay.max.ms.worker_thread.kthread.ret_from_fork.ret_from_fork_asm 0.01 ± 60% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.__alloc_pages_noprof.alloc_pages_mpol_noprof.__pmd_alloc.__handle_mm_fault 0.33 ± 18% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.__alloc_pages_noprof.alloc_pages_mpol_noprof.__pmd_alloc.copy_p4d_range 0.17 ± 50% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.__alloc_pages_noprof.alloc_pages_mpol_noprof.get_free_pages_noprof.pgd_alloc 8.62 ±216% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.__alloc_pages_noprof.alloc_pages_mpol_noprof.pte_alloc_one.__pte_alloc 0.10 ± 75% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.__anon_vma_prepare.__vmf_anon_prepare.do_anonymous_page.__handle_mm_fault 0.09 ± 86% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.__dentry_kill.dput.step_into.link_path_walk 0.56 ± 76% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.__kmalloc_cache_node_noprof.__get_vm_area_node.__vmalloc_node_range_noprof.alloc_thread_stack_node 0.05 ± 33% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.__kmalloc_cache_noprof.perf_event_mmap_event.perf_event_mmap.mmap_region 0.01 ± 51% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.__kmalloc_noprof.load_elf_phdrs.load_elf_binary.search_binary_handler 0.33 ± 60% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.__kmalloc_noprof.lsm_blob_alloc.security_prepare_creds.prepare_creds 0.56 ± 49% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.__kmalloc_noprof.lsm_blob_alloc.security_task_alloc.copy_process 0.02 ± 71% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.__put_anon_vma.unlink_anon_vmas.free_pgtables.exit_mmap 56.87 ±218% -99.7% 0.15 ±213% perf-sched.wait_time.avg.ms.__cond_resched.__release_sock.release_sock.__inet_stream_connect.inet_stream_connect 42.63 ±219% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.__release_sock.release_sock.tcp_sendmsg.sock_write_iter 0.19 ± 10% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.__tlb_batch_free_encoded_pages.tlb_finish_mmu.exit_mmap.mmput 0.02 ± 57% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.__tlb_batch_free_encoded_pages.tlb_finish_mmu.relocate_vma_down.setup_arg_pages 0.43 ± 44% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.__vmalloc_area_node.__vmalloc_node_range_noprof.alloc_thread_stack_node.dup_task_struct 13.79 ± 3% +35.5% 18.69 ± 13% perf-sched.wait_time.avg.ms.__cond_resched.__wait_for_common.affine_move_task.__set_cpus_allowed_ptr.__sched_setaffinity 0.13 ± 15% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.__wait_for_common.wait_for_completion_state.kernel_clone.__x64_sys_vfork 22.06 ±212% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.aa_sk_perm.security_socket_recvmsg.sock_recvmsg.sock_read_iter 0.88 ± 44% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.aa_sk_perm.security_socket_sendmsg.sock_write_iter.vfs_write 0.56 ±138% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.apparmor_file_alloc_security.security_file_alloc.init_file.alloc_empty_file 0.68 ± 45% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.cgroup_css_set_fork.cgroup_can_fork.copy_process.kernel_clone 0.05 ± 30% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.change_pud_range.isra.0.change_protection_range 0.37 ± 20% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.copy_page_range.dup_mmap.dup_mm.constprop 0.09 ± 45% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.down_read.acct_collect.do_exit.do_group_exit 0.08 ± 24% -96.6% 0.00 ±223% perf-sched.wait_time.avg.ms.__cond_resched.down_read.walk_component.link_path_walk.part 0.16 ±127% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.down_read.walk_component.path_lookupat.filename_lookup 0.39 ± 18% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.down_write.anon_vma_clone.anon_vma_fork.dup_mmap 0.35 ± 19% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.down_write.anon_vma_fork.dup_mmap.dup_mm 0.20 ± 24% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.down_write.free_pgtables.exit_mmap.mmput 0.07 ± 38% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.down_write.mmap_region.do_mmap.vm_mmap_pgoff 1.74 ± 37% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.down_write.shmem_file_write_iter.vfs_write.ksys_write 0.10 ± 65% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.down_write.unlink_anon_vmas.free_pgtables.exit_mmap 0.06 ± 43% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.down_write.vma_link_file.mmap_region.do_mmap 0.11 ±150% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.down_write.vma_prepare.__split_vma.vma_modify 0.02 ±101% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.down_write_killable.setup_arg_pages.load_elf_binary.search_binary_handler 0.02 ± 52% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.down_write_killable.vm_mmap_pgoff.elf_load.load_elf_binary 0.04 ± 98% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.down_write_killable.vm_mmap_pgoff.elf_load.load_elf_interp 0.11 ± 46% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.down_write_killable.vm_mmap_pgoff.ksys_mmap_pgoff.do_syscall_64 0.80 ± 41% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.dput.do_linkat.__x64_sys_link.do_syscall_64 0.87 ± 48% -96.5% 0.03 ±223% perf-sched.wait_time.avg.ms.__cond_resched.dput.open_last_lookups.path_openat.do_filp_open 0.13 ±108% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.dput.path_put.exit_fs.do_exit 46.90 ±158% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.dput.path_put.vfs_statx.vfs_fstatat 44.86 ± 63% -95.9% 1.84 ± 36% perf-sched.wait_time.avg.ms.__cond_resched.dput.scan_positives.dcache_readdir.iterate_dir 0.06 ± 26% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.dput.step_into.link_path_walk.part 0.12 ± 69% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.dput.step_into.open_last_lookups.path_openat 0.17 ±114% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.dput.step_into.path_lookupat.filename_lookup 0.15 ± 89% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.kmem_cache_alloc_lru_noprof.__d_alloc.d_alloc.d_alloc_parallel 85.20 ±219% -100.0% 0.00 ±223% perf-sched.wait_time.avg.ms.__cond_resched.kmem_cache_alloc_node_noprof.__alloc_skb.alloc_skb_with_frags.sock_alloc_send_pskb 0.65 ± 67% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.kmem_cache_alloc_node_noprof.dup_task_struct.copy_process.kernel_clone 0.37 ± 9% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.kmem_cache_alloc_noprof.anon_vma_fork.dup_mmap.dup_mm 0.72 ± 21% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.kmem_cache_alloc_noprof.copy_fs_struct.copy_process.kernel_clone 0.05 ± 98% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.kmem_cache_alloc_noprof.mas_alloc_nodes.mas_preallocate.commit_merge 0.05 ± 52% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.kmem_cache_alloc_noprof.mas_alloc_nodes.mas_preallocate.mmap_region 0.04 ± 59% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.kmem_cache_alloc_noprof.vm_area_dup.__split_vma.vma_modify 0.08 ±114% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.mmput.exec_mmap.begin_new_exec.load_elf_binary 1.03 ± 95% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.mnt_want_write.open_last_lookups.path_openat.do_filp_open 20.87 ± 56% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.mutex_lock.perf_poll.do_poll.constprop 33.59 ±222% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.mutex_lock.pipe_read.vfs_read.ksys_read 0.24 ± 17% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.mutex_lock_killable.pcpu_alloc_noprof.__percpu_counter_init_many.mm_init 0.37 ± 13% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.mutex_lock_killable.pcpu_alloc_noprof.mm_init.dup_mm 0.22 ± 52% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.put_files_struct.do_exit.do_group_exit.__x64_sys_exit_group 0.14 ± 65% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.remove_vma.exit_mmap.mmput.exit_mm 0.06 ±125% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.remove_vma.vms_complete_munmap_vmas.mmap_region.do_mmap 1.67 ± 26% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.shmem_get_folio_gfp.shmem_file_read_iter.vfs_read.ksys_read 146.47 ±213% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.shmem_get_folio_gfp.shmem_write_begin.generic_perform_write.shmem_file_write_iter 1.79 ± 18% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.shmem_undo_range.shmem_evict_inode.evict.__dentry_kill 0.01 ± 22% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.stop_one_cpu.sched_exec.bprm_execve.part 0.09 ± 26% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.super_lock.iterate_supers.ksys_sync.__x64_sys_sync 0.44 ± 76% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.task_work_run.do_exit.do_group_exit.__x64_sys_exit_group 0.09 ± 80% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.unmap_vmas.exit_mmap.mmput.exit_mm 0.15 ± 65% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.unmap_vmas.vms_clear_ptes.vms_complete_munmap_vmas.mmap_region 0.37 ± 16% -100.0% 0.00 perf-sched.wait_time.avg.ms.__cond_resched.uprobe_start_dup_mmap.dup_mmap.dup_mm.constprop 0.02 ± 43% -72.9% 0.01 ± 43% perf-sched.wait_time.avg.ms.d_alloc_parallel.lookup_open.isra.0.open_last_lookups 105.60 ± 3% +16.2% 122.75 ± 3% perf-sched.wait_time.avg.ms.devkmsg_read.vfs_read.ksys_read.do_syscall_64 1.92 ±217% -100.0% 0.00 perf-sched.wait_time.avg.ms.irqentry_exit_to_user_mode.asm_exc_page_fault.[unknown] 0.13 ± 37% -98.0% 0.00 ±223% perf-sched.wait_time.avg.ms.irqentry_exit_to_user_mode.asm_sysvec_call_function_single.[unknown] 3.24 ±213% -100.0% 0.00 perf-sched.wait_time.avg.ms.irqentry_exit_to_user_mode.asm_sysvec_reschedule_ipi.[unknown] 10.51 ± 7% +29.7% 13.63 perf-sched.wait_time.avg.ms.pipe_read.vfs_read.ksys_read.do_syscall_64 424.06 ± 11% +49.4% 633.75 ± 6% perf-sched.wait_time.avg.ms.schedule_hrtimeout_range_clock.do_poll.constprop.0.do_sys_poll 82.47 ± 68% -74.9% 20.66 ± 89% perf-sched.wait_time.avg.ms.schedule_preempt_disabled.rwsem_down_write_slowpath.down_write.do_unlinkat 75.52 ± 8% +27.8% 96.53 ± 4% perf-sched.wait_time.avg.ms.worker_thread.kthread.ret_from_fork.ret_from_fork_asm 0.05 ± 91% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.__alloc_pages_noprof.alloc_pages_mpol_noprof.__pmd_alloc.__handle_mm_fault 0.64 ± 17% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.__alloc_pages_noprof.alloc_pages_mpol_noprof.__pmd_alloc.copy_p4d_range 670.16 ± 70% -99.6% 2.98 ± 39% perf-sched.wait_time.max.ms.__cond_resched.__alloc_pages_noprof.alloc_pages_mpol_noprof.folio_alloc_mpol_noprof.vma_alloc_folio_noprof 0.36 ± 50% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.__alloc_pages_noprof.alloc_pages_mpol_noprof.get_free_pages_noprof.pgd_alloc 167.66 ±222% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.__alloc_pages_noprof.alloc_pages_mpol_noprof.pte_alloc_one.__pte_alloc 0.50 ± 96% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.__anon_vma_prepare.__vmf_anon_prepare.do_anonymous_page.__handle_mm_fault 0.31 ± 74% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.__dentry_kill.dput.step_into.link_path_walk 2.62 ± 7% -75.4% 0.64 ±140% perf-sched.wait_time.max.ms.__cond_resched.__fput.__x64_sys_close.do_syscall_64.entry_SYSCALL_64_after_hwframe 1.06 ± 88% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.__kmalloc_cache_node_noprof.__get_vm_area_node.__vmalloc_node_range_noprof.alloc_thread_stack_node 0.40 ± 52% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.__kmalloc_cache_noprof.perf_event_mmap_event.perf_event_mmap.mmap_region 0.03 ± 57% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.__kmalloc_noprof.load_elf_phdrs.load_elf_binary.search_binary_handler 0.90 ± 71% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.__kmalloc_noprof.lsm_blob_alloc.security_prepare_creds.prepare_creds 1.61 ± 55% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.__kmalloc_noprof.lsm_blob_alloc.security_task_alloc.copy_process 0.47 ± 85% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.__put_anon_vma.unlink_anon_vmas.free_pgtables.exit_mmap 168.98 ±221% -99.9% 0.15 ±213% perf-sched.wait_time.max.ms.__cond_resched.__release_sock.release_sock.__inet_stream_connect.inet_stream_connect 168.89 ±221% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.__release_sock.release_sock.tcp_sendmsg.sock_write_iter 1.66 ± 10% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.__tlb_batch_free_encoded_pages.tlb_finish_mmu.exit_mmap.mmput 0.10 ±104% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.__tlb_batch_free_encoded_pages.tlb_finish_mmu.relocate_vma_down.setup_arg_pages 2.96 ± 13% -46.0% 1.60 ± 65% perf-sched.wait_time.max.ms.__cond_resched.__tlb_batch_free_encoded_pages.tlb_finish_mmu.vms_clear_ptes.vms_complete_munmap_vmas 1.12 ± 66% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.__vmalloc_area_node.__vmalloc_node_range_noprof.alloc_thread_stack_node.dup_task_struct 836.76 ± 44% -99.8% 1.53 ± 69% perf-sched.wait_time.max.ms.__cond_resched.__wait_for_common.wait_for_completion_state.kernel_clone.__do_sys_clone3 3.03 ± 38% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.__wait_for_common.wait_for_completion_state.kernel_clone.__x64_sys_vfork 169.67 ±220% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.aa_sk_perm.security_socket_recvmsg.sock_recvmsg.sock_read_iter 2.16 ± 41% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.aa_sk_perm.security_socket_sendmsg.sock_write_iter.vfs_write 1.28 ± 82% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.apparmor_file_alloc_security.security_file_alloc.init_file.alloc_empty_file 2.09 ± 35% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.cgroup_css_set_fork.cgroup_can_fork.copy_process.kernel_clone 0.31 ± 39% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.change_pud_range.isra.0.change_protection_range 0.58 ± 29% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.copy_page_range.dup_mmap.dup_mm.constprop 1.19 ± 49% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.down_read.acct_collect.do_exit.do_group_exit 0.52 ± 41% -99.5% 0.00 ±223% perf-sched.wait_time.max.ms.__cond_resched.down_read.walk_component.link_path_walk.part 0.55 ±138% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.down_read.walk_component.path_lookupat.filename_lookup 1.11 ± 44% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.down_write.anon_vma_clone.anon_vma_fork.dup_mmap 0.76 ± 35% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.down_write.anon_vma_fork.dup_mmap.dup_mm 2.71 ± 10% -72.7% 0.74 ±141% perf-sched.wait_time.max.ms.__cond_resched.down_write.do_truncate.do_open.path_openat 1.46 ± 31% -92.3% 0.11 ±223% perf-sched.wait_time.max.ms.__cond_resched.down_write.dup_mmap.dup_mm.constprop 1.65 ± 20% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.down_write.free_pgtables.exit_mmap.mmput 0.22 ± 42% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.down_write.mmap_region.do_mmap.vm_mmap_pgoff 2.00 ± 33% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.down_write.shmem_file_write_iter.vfs_write.ksys_write 0.82 ± 51% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.down_write.unlink_anon_vmas.free_pgtables.exit_mmap 0.33 ± 50% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.down_write.vma_link_file.mmap_region.do_mmap 0.30 ±131% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.down_write.vma_prepare.__split_vma.vma_modify 0.76 ± 47% -99.3% 0.01 ±223% perf-sched.wait_time.max.ms.__cond_resched.down_write_killable.exec_mmap.begin_new_exec.load_elf_binary 0.11 ± 89% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.down_write_killable.setup_arg_pages.load_elf_binary.search_binary_handler 0.11 ± 80% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.down_write_killable.vm_mmap_pgoff.elf_load.load_elf_binary 0.18 ± 79% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.down_write_killable.vm_mmap_pgoff.elf_load.load_elf_interp 0.41 ± 44% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.down_write_killable.vm_mmap_pgoff.ksys_mmap_pgoff.do_syscall_64 2.35 ± 44% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.dput.do_linkat.__x64_sys_link.do_syscall_64 2.19 ± 41% -98.6% 0.03 ±223% perf-sched.wait_time.max.ms.__cond_resched.dput.open_last_lookups.path_openat.do_filp_open 0.35 ±113% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.dput.path_put.exit_fs.do_exit 335.13 ±140% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.dput.path_put.vfs_statx.vfs_fstatat 1005 -99.7% 2.66 ± 20% perf-sched.wait_time.max.ms.__cond_resched.dput.scan_positives.dcache_readdir.iterate_dir 169.41 ±219% -99.6% 0.74 ±149% perf-sched.wait_time.max.ms.__cond_resched.dput.shmem_unlink.vfs_unlink.do_unlinkat 0.41 ± 50% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.dput.step_into.link_path_walk.part 0.23 ± 46% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.dput.step_into.open_last_lookups.path_openat 0.87 ±117% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.dput.step_into.path_lookupat.filename_lookup 335.93 ±140% -99.5% 1.70 ± 70% perf-sched.wait_time.max.ms.__cond_resched.dput.terminate_walk.path_openat.do_filp_open 1.72 ± 21% -96.1% 0.07 ±223% perf-sched.wait_time.max.ms.__cond_resched.exit_mmap.mmput.exit_mm.do_exit 0.68 ±134% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.kmem_cache_alloc_lru_noprof.__d_alloc.d_alloc.d_alloc_parallel 2.47 ± 10% -72.3% 0.68 ±142% perf-sched.wait_time.max.ms.__cond_resched.kmem_cache_alloc_lru_noprof.shmem_alloc_inode.alloc_inode.new_inode 169.71 ±220% -100.0% 0.00 ±223% perf-sched.wait_time.max.ms.__cond_resched.kmem_cache_alloc_node_noprof.__alloc_skb.alloc_skb_with_frags.sock_alloc_send_pskb 1.46 ± 83% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.kmem_cache_alloc_node_noprof.dup_task_struct.copy_process.kernel_clone 2.32 ± 33% -80.1% 0.46 ±174% perf-sched.wait_time.max.ms.__cond_resched.kmem_cache_alloc_noprof.alloc_empty_file.path_openat.do_filp_open 1.89 ± 29% -67.6% 0.61 ±141% perf-sched.wait_time.max.ms.__cond_resched.kmem_cache_alloc_noprof.alloc_pid.copy_process.kernel_clone 1.75 ± 57% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.kmem_cache_alloc_noprof.anon_vma_fork.dup_mmap.dup_mm 2.20 ± 14% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.kmem_cache_alloc_noprof.copy_fs_struct.copy_process.kernel_clone 0.14 ± 63% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.kmem_cache_alloc_noprof.mas_alloc_nodes.mas_preallocate.commit_merge 0.33 ± 70% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.kmem_cache_alloc_noprof.mas_alloc_nodes.mas_preallocate.mmap_region 2.22 ± 30% -84.2% 0.35 ±223% perf-sched.wait_time.max.ms.__cond_resched.kmem_cache_alloc_noprof.security_inode_alloc.inode_init_always.alloc_inode 0.19 ± 67% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.kmem_cache_alloc_noprof.vm_area_dup.__split_vma.vma_modify 0.16 ±110% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.mmput.exec_mmap.begin_new_exec.load_elf_binary 168.83 ±220% -99.9% 0.23 ±223% perf-sched.wait_time.max.ms.__cond_resched.mnt_want_write.do_unlinkat.__x64_sys_unlink.do_syscall_64 1.30 ± 90% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.mnt_want_write.open_last_lookups.path_openat.do_filp_open 49.02 ± 71% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.mutex_lock.perf_poll.do_poll.constprop 167.38 ±223% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.mutex_lock.pipe_read.vfs_read.ksys_read 1.06 ± 52% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.mutex_lock_killable.pcpu_alloc_noprof.__percpu_counter_init_many.mm_init 0.62 ± 38% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.mutex_lock_killable.pcpu_alloc_noprof.mm_init.dup_mm 1.29 ± 28% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.put_files_struct.do_exit.do_group_exit.__x64_sys_exit_group 0.76 ± 67% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.remove_vma.exit_mmap.mmput.exit_mm 0.16 ± 75% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.remove_vma.vms_complete_munmap_vmas.mmap_region.do_mmap 2.31 ± 10% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.shmem_get_folio_gfp.shmem_file_read_iter.vfs_read.ksys_read 586.41 ±213% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.shmem_get_folio_gfp.shmem_write_begin.generic_perform_write.shmem_file_write_iter 2.50 ± 9% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.shmem_undo_range.shmem_evict_inode.evict.__dentry_kill 2375 ± 7% -98.3% 41.33 ±148% perf-sched.wait_time.max.ms.__cond_resched.smpboot_thread_fn.kthread.ret_from_fork.ret_from_fork_asm 1.97 ± 64% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.stop_one_cpu.sched_exec.bprm_execve.part 0.34 ± 52% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.super_lock.iterate_supers.ksys_sync.__x64_sys_sync 0.77 ± 60% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.task_work_run.do_exit.do_group_exit.__x64_sys_exit_group 0.74 ± 79% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.unmap_vmas.exit_mmap.mmput.exit_mm 0.37 ± 79% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.unmap_vmas.vms_clear_ptes.vms_complete_munmap_vmas.mmap_region 1.06 ± 67% -100.0% 0.00 perf-sched.wait_time.max.ms.__cond_resched.uprobe_start_dup_mmap.dup_mmap.dup_mm.constprop 2.09 ± 24% -63.9% 0.75 ±141% perf-sched.wait_time.max.ms.__cond_resched.vfs_write.ksys_write.do_syscall_64.entry_SYSCALL_64_after_hwframe 0.17 ± 35% -74.1% 0.04 ±122% perf-sched.wait_time.max.ms.d_alloc_parallel.__lookup_slow.walk_component.path_lookupat 1796 ± 22% -35.8% 1152 ± 25% perf-sched.wait_time.max.ms.do_task_dead.do_exit.do_group_exit.__x64_sys_exit_group.x64_sys_call 167.97 ±222% -100.0% 0.00 perf-sched.wait_time.max.ms.irqentry_exit_to_user_mode.asm_exc_page_fault.[unknown] 2200 ± 54% -92.3% 169.14 ±221% perf-sched.wait_time.max.ms.irqentry_exit_to_user_mode.asm_sysvec_apic_timer_interrupt.[unknown] 1.42 ± 67% -99.8% 0.00 ±223% perf-sched.wait_time.max.ms.irqentry_exit_to_user_mode.asm_sysvec_call_function_single.[unknown] 170.12 ±219% -99.8% 0.32 ±218% perf-sched.wait_time.max.ms.irqentry_exit_to_user_mode.asm_sysvec_call_function_single.[unknown].[unknown] 168.00 ±221% -100.0% 0.00 perf-sched.wait_time.max.ms.irqentry_exit_to_user_mode.asm_sysvec_reschedule_ipi.[unknown] 0.52 ± 15% -70.5% 0.15 ±145% perf-sched.wait_time.max.ms.schedule_preempt_disabled.rwsem_down_write_slowpath.down_write.dup_mmap 2744 ± 3% -36.5% 1741 ± 15% perf-sched.wait_time.max.ms.smpboot_thread_fn.kthread.ret_from_fork.ret_from_fork_asm 2864 ± 23% -30.6% 1987 ± 10% perf-sched.wait_time.max.ms.worker_thread.kthread.ret_from_fork.ret_from_fork_asm Disclaimer: Results have been estimated based on internal Intel analysis and are provided for informational purposes only. Any difference in system hardware or software design or configuration may affect actual performance. -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
On Fri, Oct 11, 2024 at 02:24:49PM +0800, Xavier wrote: > The current code subtracts the value of curr from avg_vruntime and avg_load > during runtime. Then, every time avg_vruntime() is called, it adds the > value of curr to the avg_vruntime and avg_load. Afterward, it divides these > and adds min_vruntime to obtain the actual avg_vruntime. > > Analysis of the code indicates that avg_vruntime only changes significantly > during update_curr(), update_min_vruntime(), and when tasks are enqueued or > dequeued. Therefore, it is sufficient to recalculate and store avg_vruntime > only in these specific scenarios. This optimization ensures that accessing > avg_vruntime() does not necessitate a recalculation each time, thereby > enhancing the efficiency of the code. > > There is no need to subtract curr’s load from avg_load during runtime. > Instead, we only need to calculate the incremental change and update > avg_vruntime whenever curr’s time is updated. > > To better represent their functions, rename the original avg_vruntime and > avg_load to tot_vruntime and tot_load, respectively, which more accurately > describes their roles in the computation. > > Signed-off-by: Xavier <xavier_qy@163.com> This makes the code more complicated for no shown benefit.
At 2024-10-11 16:52:01, "Peter Zijlstra" <peterz@infradead.org> wrote: >On Fri, Oct 11, 2024 at 02:24:49PM +0800, Xavier wrote: >> The current code subtracts the value of curr from avg_vruntime and avg_load >> during runtime. Then, every time avg_vruntime() is called, it adds the >> value of curr to the avg_vruntime and avg_load. Afterward, it divides these >> and adds min_vruntime to obtain the actual avg_vruntime. >> >> Analysis of the code indicates that avg_vruntime only changes significantly >> during update_curr(), update_min_vruntime(), and when tasks are enqueued or >> dequeued. Therefore, it is sufficient to recalculate and store avg_vruntime >> only in these specific scenarios. This optimization ensures that accessing >> avg_vruntime() does not necessitate a recalculation each time, thereby >> enhancing the efficiency of the code. >> >> There is no need to subtract curr’s load from avg_load during runtime. >> Instead, we only need to calculate the incremental change and update >> avg_vruntime whenever curr’s time is updated. >> >> To better represent their functions, rename the original avg_vruntime and >> avg_load to tot_vruntime and tot_load, respectively, which more accurately >> describes their roles in the computation. >> >> Signed-off-by: Xavier <xavier_qy@163.com> > >This makes the code more complicated for no shown benefit. Hi Peter, Thank you for reviewing this patch. I would like to address your questions as follows: Code Complexity vs. Understandability: I agree that this modification adds some complexity to the code, but the method of calculation is more straightforward. This patch maintains consistency in how avg_vruntime is added or subtracted relative to load. Specifically, the enqueue and dequeue operations of tasks directly impact the avg_vruntime of cfs_rq, which seems logical. Efficiency Improvements: This approach minimizes unnecessary calculations, thereby enhancing execution efficiency. I understand that entity_eligible() and vruntime_eligible() are high-frequency operations. The existing code recalculates curr->vruntime added to cfs_rq->avg_vruntime for each eligibility check. If many tasks in the cfs_rq do not meet the conditions, it leads to multiple redundant calculations within pick_eevdf(). This patch resolves this issue by computing cfs_rq->tot_vruntime only when an update is necessary, allowing vruntime_eligible() to utilize the precomputed value directly. Reducing avg_vruntime Calculations: This patch also reduces the frequency of avg_vruntime evaluations. The original code calls avg_vruntime() every time it's needed, despite many of those calls being redundant when curr->vruntime hasn't changed. This patch ensures that cfs_rq->avg_vruntime is updated only when curr->vruntime or cfs_rq->tot_vruntime changes, allowing subsequent calls to directly access the current value. This greatly decreases the frequency of avg_vruntime calculations. I hope this explanation clarifies the benefits of the patch. I welcome any comments or suggestions. Thank you!
From: xw357835 <xw357835@alibaba-inc.com>
The current code subtracts the value of curr from avg_vruntime and avg_load
during runtime. Then, every time avg_vruntime() is called, it adds the
value of curr to the avg_vruntime and avg_load. Afterward, it divides these
and adds min_vruntime to obtain the actual avg_vruntime.
Analysis of the code indicates that avg_vruntime only changes significantly
during update_curr(), update_min_vruntime(), and when tasks are enqueued or
dequeued. Therefore, it is sufficient to recalculate and store avg_vruntime
only in these specific scenarios. This optimization ensures that accessing
avg_vruntime() does not necessitate a recalculation each time, thereby
enhancing the efficiency of the code.
There is no need to subtract curr’s load from avg_load during runtime.
Instead, we only need to calculate the incremental change and update
avg_vruntime whenever curr’s time is updated.
To better represent their functions, rename the original avg_vruntime and
avg_load to tot_vruntime and tot_load, respectively, which more accurately
describes their roles in the computation.
Signed-off-by: xw357835 <xw357835@alibaba-inc.com>
---
Note:
The patch V2 has been updated based on the latest sched/core branch.
kernel/sched/fair.c | 107 ++++++++++++++++++++++++-------------------
kernel/sched/sched.h | 3 +-
2 files changed, 61 insertions(+), 49 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 5a621210c9c..fb0434dd0a8 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -606,8 +606,8 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
* Which we track using:
*
* v0 := cfs_rq->min_vruntime
- * \Sum (v_i - v0) * w_i := cfs_rq->avg_vruntime
- * \Sum w_i := cfs_rq->avg_load
+ * \Sum (v_i - v0) * w_i := cfs_rq->tot_vruntime
+ * \Sum w_i := cfs_rq->tot_load
*
* Since min_vruntime is a monotonic increasing variable that closely tracks
* the per-task service, these deltas: (v_i - v), will be in the order of the
@@ -617,14 +617,29 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
*
* As measured, the max (key * weight) value was ~44 bits for a kernel build.
*/
+static inline void avg_vruntime_update(struct cfs_rq *cfs_rq)
+{
+ s64 tot_vruntime = cfs_rq->tot_vruntime;
+
+ /* sign flips effective floor / ceiling */
+ if (cfs_rq->tot_load) {
+ if (tot_vruntime < 0)
+ tot_vruntime -= (cfs_rq->tot_load - 1);
+ cfs_rq->avg_vruntime = div_s64(tot_vruntime, cfs_rq->tot_load);
+ } else {
+ cfs_rq->avg_vruntime = cfs_rq->tot_vruntime;
+ }
+}
+
static void
avg_vruntime_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
unsigned long weight = scale_load_down(se->load.weight);
s64 key = entity_key(cfs_rq, se);
- cfs_rq->avg_vruntime += key * weight;
- cfs_rq->avg_load += weight;
+ cfs_rq->tot_vruntime += key * weight;
+ cfs_rq->tot_load += weight;
+ avg_vruntime_update(cfs_rq);
}
static void
@@ -633,17 +648,29 @@ avg_vruntime_sub(struct cfs_rq *cfs_rq, struct sched_entity *se)
unsigned long weight = scale_load_down(se->load.weight);
s64 key = entity_key(cfs_rq, se);
- cfs_rq->avg_vruntime -= key * weight;
- cfs_rq->avg_load -= weight;
+ cfs_rq->tot_vruntime -= key * weight;
+ cfs_rq->tot_load -= weight;
+ avg_vruntime_update(cfs_rq);
+}
+
+static inline
+void avg_vruntime_update_for_curr(struct cfs_rq *cfs_rq, s64 delta)
+{
+ struct sched_entity *curr = cfs_rq->curr;
+ unsigned long weight = scale_load_down(curr->load.weight);
+
+ cfs_rq->tot_vruntime += delta * weight;
+ avg_vruntime_update(cfs_rq);
}
static inline
-void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
+void avg_vruntime_update_for_minv(struct cfs_rq *cfs_rq, s64 delta)
{
/*
- * v' = v + d ==> avg_vruntime' = avg_runtime - d*avg_load
+ * v' = v + d ==> avg_runtime' = tot_runtime - d*tot_load
*/
- cfs_rq->avg_vruntime -= cfs_rq->avg_load * delta;
+ cfs_rq->tot_vruntime -= cfs_rq->tot_load * delta;
+ avg_vruntime_update(cfs_rq);
}
/*
@@ -652,25 +679,7 @@ void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
*/
u64 avg_vruntime(struct cfs_rq *cfs_rq)
{
- struct sched_entity *curr = cfs_rq->curr;
- s64 avg = cfs_rq->avg_vruntime;
- long load = cfs_rq->avg_load;
-
- if (curr && curr->on_rq) {
- unsigned long weight = scale_load_down(curr->load.weight);
-
- avg += entity_key(cfs_rq, curr) * weight;
- load += weight;
- }
-
- if (load) {
- /* sign flips effective floor / ceiling */
- if (avg < 0)
- avg -= (load - 1);
- avg = div_s64(avg, load);
- }
-
- return cfs_rq->min_vruntime + avg;
+ return cfs_rq->min_vruntime + cfs_rq->avg_vruntime;
}
/*
@@ -725,18 +734,8 @@ static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
*/
static int vruntime_eligible(struct cfs_rq *cfs_rq, u64 vruntime)
{
- struct sched_entity *curr = cfs_rq->curr;
- s64 avg = cfs_rq->avg_vruntime;
- long load = cfs_rq->avg_load;
-
- if (curr && curr->on_rq) {
- unsigned long weight = scale_load_down(curr->load.weight);
-
- avg += entity_key(cfs_rq, curr) * weight;
- load += weight;
- }
-
- return avg >= (s64)(vruntime - cfs_rq->min_vruntime) * load;
+ return cfs_rq->tot_vruntime >=
+ (s64)(vruntime - cfs_rq->min_vruntime) * (s64)cfs_rq->tot_load;
}
int entity_eligible(struct cfs_rq *cfs_rq, struct sched_entity *se)
@@ -752,7 +751,7 @@ static u64 __update_min_vruntime(struct cfs_rq *cfs_rq, u64 vruntime)
*/
s64 delta = (s64)(vruntime - min_vruntime);
if (delta > 0) {
- avg_vruntime_update(cfs_rq, delta);
+ avg_vruntime_update_for_minv(cfs_rq, delta);
min_vruntime = vruntime;
}
return min_vruntime;
@@ -851,7 +850,6 @@ RB_DECLARE_CALLBACKS(static, min_vruntime_cb, struct sched_entity,
*/
static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
- avg_vruntime_add(cfs_rq, se);
se->min_vruntime = se->vruntime;
se->min_slice = se->slice;
rb_add_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
@@ -862,7 +860,6 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
rb_erase_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
&min_vruntime_cb);
- avg_vruntime_sub(cfs_rq, se);
}
struct sched_entity *__pick_root_entity(struct cfs_rq *cfs_rq)
@@ -1219,6 +1216,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
struct rq *rq = rq_of(cfs_rq);
s64 delta_exec;
bool resched;
+ s64 vdelta_exec;
if (unlikely(!curr))
return;
@@ -1227,8 +1225,10 @@ static void update_curr(struct cfs_rq *cfs_rq)
if (unlikely(delta_exec <= 0))
return;
- curr->vruntime += calc_delta_fair(delta_exec, curr);
+ vdelta_exec = calc_delta_fair(delta_exec, curr);
+ curr->vruntime += vdelta_exec;
resched = update_deadline(cfs_rq, curr);
+ avg_vruntime_update_for_curr(cfs_rq, vdelta_exec);
update_min_vruntime(cfs_rq);
if (entity_is_task(curr)) {
@@ -3883,6 +3883,8 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
avruntime = avg_vruntime(cfs_rq);
if (!curr)
__dequeue_entity(cfs_rq, se);
+
+ avg_vruntime_sub(cfs_rq, se);
update_load_sub(&cfs_rq->load, se->load.weight);
}
dequeue_load_avg(cfs_rq, se);
@@ -3913,6 +3915,8 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
if (!curr)
__enqueue_entity(cfs_rq, se);
+ avg_vruntime_add(cfs_rq, se);
+
/*
* The entity's vruntime has been adjusted, so let's check
* whether the rq-wide min_vruntime needs updated too. Since
@@ -5281,7 +5285,6 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
* EEVDF: placement strategy #1 / #2
*/
if (sched_feat(PLACE_LAG) && cfs_rq->nr_running && se->vlag) {
- struct sched_entity *curr = cfs_rq->curr;
unsigned long load;
lag = se->vlag;
@@ -5338,9 +5341,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
*
* vl_i = (W + w_i)*vl'_i / W
*/
- load = cfs_rq->avg_load;
- if (curr && curr->on_rq)
- load += scale_load_down(curr->load.weight);
+ load = cfs_rq->tot_load;
lag *= load + scale_load_down(se->load.weight);
if (WARN_ON_ONCE(!load))
@@ -5427,6 +5428,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
update_stats_enqueue_fair(cfs_rq, se, flags);
if (!curr)
__enqueue_entity(cfs_rq, se);
+
+ avg_vruntime_add(cfs_rq, se);
se->on_rq = 1;
if (cfs_rq->nr_running == 1) {
@@ -5530,6 +5533,8 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
if (se != cfs_rq->curr)
__dequeue_entity(cfs_rq, se);
+
+ avg_vruntime_sub(cfs_rq, se);
se->on_rq = 0;
account_entity_dequeue(cfs_rq, se);
@@ -6924,11 +6929,17 @@ requeue_delayed_entity(struct sched_entity *se)
cfs_rq->nr_running--;
if (se != cfs_rq->curr)
__dequeue_entity(cfs_rq, se);
+ avg_vruntime_sub(cfs_rq, se);
+
se->vlag = 0;
place_entity(cfs_rq, se, 0);
+
if (se != cfs_rq->curr)
__enqueue_entity(cfs_rq, se);
+ avg_vruntime_add(cfs_rq, se);
cfs_rq->nr_running++;
+
+ update_min_vruntime(cfs_rq);
}
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index b1c3588a8f0..7f7c93518c7 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -650,8 +650,9 @@ struct cfs_rq {
unsigned int idle_nr_running; /* SCHED_IDLE */
unsigned int idle_h_nr_running; /* SCHED_IDLE */
+ s64 tot_vruntime;
s64 avg_vruntime;
- u64 avg_load;
+ u64 tot_load;
u64 min_vruntime;
#ifdef CONFIG_SCHED_CORE
--
2.45.2
© 2016 - 2024 Red Hat, Inc.