[PATCH v1] sched/eevdf: Reduce the computation frequency of avg_vruntime

Xavier posted 1 patch 2 months, 2 weeks ago
There is a newer version of this series
kernel/sched/fair.c  | 101 +++++++++++++++++++++++--------------------
kernel/sched/sched.h |   3 +-
2 files changed, 56 insertions(+), 48 deletions(-)
[PATCH v1] sched/eevdf: Reduce the computation frequency of avg_vruntime
Posted by Xavier 2 months, 2 weeks ago
The current code subtracts the value of curr from avg_vruntime and avg_load
during runtime. Then, every time avg_vruntime() is called, it adds the
value of curr to the avg_vruntime and avg_load. Afterward, it divides these
and adds min_vruntime to obtain the actual avg_vruntime.

Analysis of the code indicates that avg_vruntime only changes significantly
during update_curr(), update_min_vruntime(), and when tasks are enqueued or
dequeued. Therefore, it is sufficient to recalculate and store avg_vruntime
only in these specific scenarios. This optimization ensures that accessing
avg_vruntime() does not necessitate a recalculation each time, thereby
enhancing the efficiency of the code.

There is no need to subtract curr’s load from avg_load during runtime.
Instead, we only need to calculate the incremental change and update
avg_vruntime whenever curr’s time is updated.

To better represent their functions, rename the original avg_vruntime and
avg_load to tot_vruntime and tot_load, respectively, which more accurately
describes their roles in the computation.

Signed-off-by: Xavier <xavier_qy@163.com>
---
 kernel/sched/fair.c  | 101 +++++++++++++++++++++++--------------------
 kernel/sched/sched.h |   3 +-
 2 files changed, 56 insertions(+), 48 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 9057584ec06d..308d4bc3f40d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -606,8 +606,8 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
  * Which we track using:
  *
  *                    v0 := cfs_rq->min_vruntime
- * \Sum (v_i - v0) * w_i := cfs_rq->avg_vruntime
- *              \Sum w_i := cfs_rq->avg_load
+ * \Sum (v_i - v0) * w_i := cfs_rq->tot_vruntime
+ *              \Sum w_i := cfs_rq->tot_load
  *
  * Since min_vruntime is a monotonic increasing variable that closely tracks
  * the per-task service, these deltas: (v_i - v), will be in the order of the
@@ -617,14 +617,29 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
  *
  * As measured, the max (key * weight) value was ~44 bits for a kernel build.
  */
+static inline void avg_vruntime_update(struct cfs_rq *cfs_rq)
+{
+	s64	tot_vruntime = cfs_rq->tot_vruntime;
+
+	/* sign flips effective floor / ceiling */
+	if (cfs_rq->tot_load) {
+		if (tot_vruntime < 0)
+			tot_vruntime -= (cfs_rq->tot_load - 1);
+		cfs_rq->avg_vruntime = div_s64(tot_vruntime, cfs_rq->tot_load);
+	} else {
+		cfs_rq->avg_vruntime = cfs_rq->tot_vruntime;
+	}
+}
+
 static void
 avg_vruntime_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	unsigned long weight = scale_load_down(se->load.weight);
 	s64 key = entity_key(cfs_rq, se);
 
-	cfs_rq->avg_vruntime += key * weight;
-	cfs_rq->avg_load += weight;
+	cfs_rq->tot_vruntime += key * weight;
+	cfs_rq->tot_load += weight;
+	avg_vruntime_update(cfs_rq);
 }
 
 static void
@@ -633,17 +648,29 @@ avg_vruntime_sub(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	unsigned long weight = scale_load_down(se->load.weight);
 	s64 key = entity_key(cfs_rq, se);
 
-	cfs_rq->avg_vruntime -= key * weight;
-	cfs_rq->avg_load -= weight;
+	cfs_rq->tot_vruntime -= key * weight;
+	cfs_rq->tot_load -= weight;
+	avg_vruntime_update(cfs_rq);
+}
+
+static inline
+void avg_vruntime_update_for_curr(struct cfs_rq *cfs_rq, s64 delta)
+{
+	struct sched_entity *curr = cfs_rq->curr;
+	unsigned long weight = scale_load_down(curr->load.weight);
+
+	cfs_rq->tot_vruntime += delta * weight;
+	avg_vruntime_update(cfs_rq);
 }
 
 static inline
-void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
+void avg_vruntime_update_for_minv(struct cfs_rq *cfs_rq, s64 delta)
 {
 	/*
-	 * v' = v + d ==> avg_vruntime' = avg_runtime - d*avg_load
+	 * v' = v + d ==> avg_runtime' = tot_runtime - d*tot_load
 	 */
-	cfs_rq->avg_vruntime -= cfs_rq->avg_load * delta;
+	cfs_rq->tot_vruntime -= cfs_rq->tot_load * delta;
+	avg_vruntime_update(cfs_rq);
 }
 
 /*
@@ -652,25 +679,7 @@ void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
  */
 u64 avg_vruntime(struct cfs_rq *cfs_rq)
 {
-	struct sched_entity *curr = cfs_rq->curr;
-	s64 avg = cfs_rq->avg_vruntime;
-	long load = cfs_rq->avg_load;
-
-	if (curr && curr->on_rq) {
-		unsigned long weight = scale_load_down(curr->load.weight);
-
-		avg += entity_key(cfs_rq, curr) * weight;
-		load += weight;
-	}
-
-	if (load) {
-		/* sign flips effective floor / ceiling */
-		if (avg < 0)
-			avg -= (load - 1);
-		avg = div_s64(avg, load);
-	}
-
-	return cfs_rq->min_vruntime + avg;
+	return cfs_rq->min_vruntime + cfs_rq->avg_vruntime;
 }
 
 /*
@@ -725,18 +734,10 @@ static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
  */
 static int vruntime_eligible(struct cfs_rq *cfs_rq, u64 vruntime)
 {
-	struct sched_entity *curr = cfs_rq->curr;
-	s64 avg = cfs_rq->avg_vruntime;
-	long load = cfs_rq->avg_load;
+	s64 total = cfs_rq->tot_vruntime;
+	long load = cfs_rq->tot_load;
 
-	if (curr && curr->on_rq) {
-		unsigned long weight = scale_load_down(curr->load.weight);
-
-		avg += entity_key(cfs_rq, curr) * weight;
-		load += weight;
-	}
-
-	return avg >= (s64)(vruntime - cfs_rq->min_vruntime) * load;
+	return total >= (s64)(vruntime - cfs_rq->min_vruntime) * load;
 }
 
 int entity_eligible(struct cfs_rq *cfs_rq, struct sched_entity *se)
@@ -752,7 +753,7 @@ static u64 __update_min_vruntime(struct cfs_rq *cfs_rq, u64 vruntime)
 	 */
 	s64 delta = (s64)(vruntime - min_vruntime);
 	if (delta > 0) {
-		avg_vruntime_update(cfs_rq, delta);
+		avg_vruntime_update_for_minv(cfs_rq, delta);
 		min_vruntime = vruntime;
 	}
 	return min_vruntime;
@@ -822,7 +823,6 @@ RB_DECLARE_CALLBACKS(static, min_vruntime_cb, struct sched_entity,
  */
 static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
-	avg_vruntime_add(cfs_rq, se);
 	se->min_vruntime = se->vruntime;
 	rb_add_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
 				__entity_less, &min_vruntime_cb);
@@ -832,7 +832,6 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	rb_erase_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
 				  &min_vruntime_cb);
-	avg_vruntime_sub(cfs_rq, se);
 }
 
 struct sched_entity *__pick_root_entity(struct cfs_rq *cfs_rq)
@@ -1157,6 +1156,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
 {
 	struct sched_entity *curr = cfs_rq->curr;
 	s64 delta_exec;
+	s64 vdelta_exec;
 
 	if (unlikely(!curr))
 		return;
@@ -1165,8 +1165,10 @@ static void update_curr(struct cfs_rq *cfs_rq)
 	if (unlikely(delta_exec <= 0))
 		return;
 
-	curr->vruntime += calc_delta_fair(delta_exec, curr);
+	vdelta_exec = calc_delta_fair(delta_exec, curr);
+	curr->vruntime += vdelta_exec;
 	update_deadline(cfs_rq, curr);
+	avg_vruntime_update_for_curr(cfs_rq, vdelta_exec);
 	update_min_vruntime(cfs_rq);
 
 	if (entity_is_task(curr))
@@ -3794,6 +3796,8 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
 		avruntime = avg_vruntime(cfs_rq);
 		if (!curr)
 			__dequeue_entity(cfs_rq, se);
+
+		avg_vruntime_sub(cfs_rq, se);
 		update_load_sub(&cfs_rq->load, se->load.weight);
 	}
 	dequeue_load_avg(cfs_rq, se);
@@ -3824,6 +3828,8 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
 		if (!curr)
 			__enqueue_entity(cfs_rq, se);
 
+		avg_vruntime_add(cfs_rq, se);
+
 		/*
 		 * The entity's vruntime has been adjusted, so let's check
 		 * whether the rq-wide min_vruntime needs updated too. Since
@@ -5190,7 +5196,6 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 	 * EEVDF: placement strategy #1 / #2
 	 */
 	if (sched_feat(PLACE_LAG) && cfs_rq->nr_running) {
-		struct sched_entity *curr = cfs_rq->curr;
 		unsigned long load;
 
 		lag = se->vlag;
@@ -5247,9 +5252,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 		 *
 		 *   vl_i = (W + w_i)*vl'_i / W
 		 */
-		load = cfs_rq->avg_load;
-		if (curr && curr->on_rq)
-			load += scale_load_down(curr->load.weight);
+		load = cfs_rq->tot_load;
 
 		lag *= load + scale_load_down(se->load.weight);
 		if (WARN_ON_ONCE(!load))
@@ -5327,6 +5330,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 	update_stats_enqueue_fair(cfs_rq, se, flags);
 	if (!curr)
 		__enqueue_entity(cfs_rq, se);
+
+	avg_vruntime_add(cfs_rq, se);
 	se->on_rq = 1;
 
 	if (cfs_rq->nr_running == 1) {
@@ -5397,6 +5402,8 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 	update_entity_lag(cfs_rq, se);
 	if (se != cfs_rq->curr)
 		__dequeue_entity(cfs_rq, se);
+
+	avg_vruntime_sub(cfs_rq, se);
 	se->on_rq = 0;
 	account_entity_dequeue(cfs_rq, se);
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 4c36cc680361..57f07c56ecda 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -596,8 +596,9 @@ struct cfs_rq {
 	unsigned int		idle_nr_running;   /* SCHED_IDLE */
 	unsigned int		idle_h_nr_running; /* SCHED_IDLE */
 
+	s64			tot_vruntime;
 	s64			avg_vruntime;
-	u64			avg_load;
+	u64			tot_load;
 
 	u64			exec_clock;
 	u64			min_vruntime;
-- 
2.45.2

Re:[PATCH v1] sched/eevdf: Reduce the computation frequency of avg_vruntime
Posted by Xavier 2 months ago

Hello Peter,

Do you have any suggestions regarding the optimization patch I proposed?
Or does anyone else have any thoughts?
Thank you!



At 2024-09-12 17:14:54, "Xavier" <xavier_qy@163.com> wrote:
>The current code subtracts the value of curr from avg_vruntime and avg_load
>during runtime. Then, every time avg_vruntime() is called, it adds the
>value of curr to the avg_vruntime and avg_load. Afterward, it divides these
>and adds min_vruntime to obtain the actual avg_vruntime.
>
>Analysis of the code indicates that avg_vruntime only changes significantly
>during update_curr(), update_min_vruntime(), and when tasks are enqueued or
>dequeued. Therefore, it is sufficient to recalculate and store avg_vruntime
>only in these specific scenarios. This optimization ensures that accessing
>avg_vruntime() does not necessitate a recalculation each time, thereby
>enhancing the efficiency of the code.
>
>There is no need to subtract curr’s load from avg_load during runtime.
>Instead, we only need to calculate the incremental change and update
>avg_vruntime whenever curr’s time is updated.
>
>To better represent their functions, rename the original avg_vruntime and
>avg_load to tot_vruntime and tot_load, respectively, which more accurately
>describes their roles in the computation.
>
>Signed-off-by: Xavier <xavier_qy@163.com>
>---
> kernel/sched/fair.c  | 101 +++++++++++++++++++++++--------------------
> kernel/sched/sched.h |   3 +-
> 2 files changed, 56 insertions(+), 48 deletions(-)
>
>diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
>index 9057584ec06d..308d4bc3f40d 100644
>--- a/kernel/sched/fair.c
>+++ b/kernel/sched/fair.c
>@@ -606,8 +606,8 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
>  * Which we track using:
>  *
>  *                    v0 := cfs_rq->min_vruntime
>- * \Sum (v_i - v0) * w_i := cfs_rq->avg_vruntime
>- *              \Sum w_i := cfs_rq->avg_load
>+ * \Sum (v_i - v0) * w_i := cfs_rq->tot_vruntime
>+ *              \Sum w_i := cfs_rq->tot_load
>  *
>  * Since min_vruntime is a monotonic increasing variable that closely tracks
>  * the per-task service, these deltas: (v_i - v), will be in the order of the
>@@ -617,14 +617,29 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
>  *
>  * As measured, the max (key * weight) value was ~44 bits for a kernel build.
>  */
>+static inline void avg_vruntime_update(struct cfs_rq *cfs_rq)
>+{
>+	s64	tot_vruntime = cfs_rq->tot_vruntime;
>+
>+	/* sign flips effective floor / ceiling */
>+	if (cfs_rq->tot_load) {
>+		if (tot_vruntime < 0)
>+			tot_vruntime -= (cfs_rq->tot_load - 1);
>+		cfs_rq->avg_vruntime = div_s64(tot_vruntime, cfs_rq->tot_load);
>+	} else {
>+		cfs_rq->avg_vruntime = cfs_rq->tot_vruntime;
>+	}
>+}
>+
> static void
> avg_vruntime_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
> {
> 	unsigned long weight = scale_load_down(se->load.weight);
> 	s64 key = entity_key(cfs_rq, se);
> 
>-	cfs_rq->avg_vruntime += key * weight;
>-	cfs_rq->avg_load += weight;
>+	cfs_rq->tot_vruntime += key * weight;
>+	cfs_rq->tot_load += weight;
>+	avg_vruntime_update(cfs_rq);
> }
> 
> static void
>@@ -633,17 +648,29 @@ avg_vruntime_sub(struct cfs_rq *cfs_rq, struct sched_entity *se)
> 	unsigned long weight = scale_load_down(se->load.weight);
> 	s64 key = entity_key(cfs_rq, se);
> 
>-	cfs_rq->avg_vruntime -= key * weight;
>-	cfs_rq->avg_load -= weight;
>+	cfs_rq->tot_vruntime -= key * weight;
>+	cfs_rq->tot_load -= weight;
>+	avg_vruntime_update(cfs_rq);
>+}
>+
>+static inline
>+void avg_vruntime_update_for_curr(struct cfs_rq *cfs_rq, s64 delta)
>+{
>+	struct sched_entity *curr = cfs_rq->curr;
>+	unsigned long weight = scale_load_down(curr->load.weight);
>+
>+	cfs_rq->tot_vruntime += delta * weight;
>+	avg_vruntime_update(cfs_rq);
> }
> 
> static inline
>-void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
>+void avg_vruntime_update_for_minv(struct cfs_rq *cfs_rq, s64 delta)
> {
> 	/*
>-	 * v' = v + d ==> avg_vruntime' = avg_runtime - d*avg_load
>+	 * v' = v + d ==> avg_runtime' = tot_runtime - d*tot_load
> 	 */
>-	cfs_rq->avg_vruntime -= cfs_rq->avg_load * delta;
>+	cfs_rq->tot_vruntime -= cfs_rq->tot_load * delta;
>+	avg_vruntime_update(cfs_rq);
> }
> 
> /*
>@@ -652,25 +679,7 @@ void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
>  */
> u64 avg_vruntime(struct cfs_rq *cfs_rq)
> {
>-	struct sched_entity *curr = cfs_rq->curr;
>-	s64 avg = cfs_rq->avg_vruntime;
>-	long load = cfs_rq->avg_load;
>-
>-	if (curr && curr->on_rq) {
>-		unsigned long weight = scale_load_down(curr->load.weight);
>-
>-		avg += entity_key(cfs_rq, curr) * weight;
>-		load += weight;
>-	}
>-
>-	if (load) {
>-		/* sign flips effective floor / ceiling */
>-		if (avg < 0)
>-			avg -= (load - 1);
>-		avg = div_s64(avg, load);
>-	}
>-
>-	return cfs_rq->min_vruntime + avg;
>+	return cfs_rq->min_vruntime + cfs_rq->avg_vruntime;
> }
> 
> /*
>@@ -725,18 +734,10 @@ static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
>  */
> static int vruntime_eligible(struct cfs_rq *cfs_rq, u64 vruntime)
> {
>-	struct sched_entity *curr = cfs_rq->curr;
>-	s64 avg = cfs_rq->avg_vruntime;
>-	long load = cfs_rq->avg_load;
>+	s64 total = cfs_rq->tot_vruntime;
>+	long load = cfs_rq->tot_load;
> 
>-	if (curr && curr->on_rq) {
>-		unsigned long weight = scale_load_down(curr->load.weight);
>-
>-		avg += entity_key(cfs_rq, curr) * weight;
>-		load += weight;
>-	}
>-
>-	return avg >= (s64)(vruntime - cfs_rq->min_vruntime) * load;
>+	return total >= (s64)(vruntime - cfs_rq->min_vruntime) * load;
> }
> 
> int entity_eligible(struct cfs_rq *cfs_rq, struct sched_entity *se)
>@@ -752,7 +753,7 @@ static u64 __update_min_vruntime(struct cfs_rq *cfs_rq, u64 vruntime)
> 	 */
> 	s64 delta = (s64)(vruntime - min_vruntime);
> 	if (delta > 0) {
>-		avg_vruntime_update(cfs_rq, delta);
>+		avg_vruntime_update_for_minv(cfs_rq, delta);
> 		min_vruntime = vruntime;
> 	}
> 	return min_vruntime;
>@@ -822,7 +823,6 @@ RB_DECLARE_CALLBACKS(static, min_vruntime_cb, struct sched_entity,
>  */
> static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
> {
>-	avg_vruntime_add(cfs_rq, se);
> 	se->min_vruntime = se->vruntime;
> 	rb_add_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
> 				__entity_less, &min_vruntime_cb);
>@@ -832,7 +832,6 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
> {
> 	rb_erase_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
> 				  &min_vruntime_cb);
>-	avg_vruntime_sub(cfs_rq, se);
> }
> 
> struct sched_entity *__pick_root_entity(struct cfs_rq *cfs_rq)
>@@ -1157,6 +1156,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
> {
> 	struct sched_entity *curr = cfs_rq->curr;
> 	s64 delta_exec;
>+	s64 vdelta_exec;
> 
> 	if (unlikely(!curr))
> 		return;
>@@ -1165,8 +1165,10 @@ static void update_curr(struct cfs_rq *cfs_rq)
> 	if (unlikely(delta_exec <= 0))
> 		return;
> 
>-	curr->vruntime += calc_delta_fair(delta_exec, curr);
>+	vdelta_exec = calc_delta_fair(delta_exec, curr);
>+	curr->vruntime += vdelta_exec;
> 	update_deadline(cfs_rq, curr);
>+	avg_vruntime_update_for_curr(cfs_rq, vdelta_exec);
> 	update_min_vruntime(cfs_rq);
> 
> 	if (entity_is_task(curr))
>@@ -3794,6 +3796,8 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
> 		avruntime = avg_vruntime(cfs_rq);
> 		if (!curr)
> 			__dequeue_entity(cfs_rq, se);
>+
>+		avg_vruntime_sub(cfs_rq, se);
> 		update_load_sub(&cfs_rq->load, se->load.weight);
> 	}
> 	dequeue_load_avg(cfs_rq, se);
>@@ -3824,6 +3828,8 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
> 		if (!curr)
> 			__enqueue_entity(cfs_rq, se);
> 
>+		avg_vruntime_add(cfs_rq, se);
>+
> 		/*
> 		 * The entity's vruntime has been adjusted, so let's check
> 		 * whether the rq-wide min_vruntime needs updated too. Since
>@@ -5190,7 +5196,6 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
> 	 * EEVDF: placement strategy #1 / #2
> 	 */
> 	if (sched_feat(PLACE_LAG) && cfs_rq->nr_running) {
>-		struct sched_entity *curr = cfs_rq->curr;
> 		unsigned long load;
> 
> 		lag = se->vlag;
>@@ -5247,9 +5252,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
> 		 *
> 		 *   vl_i = (W + w_i)*vl'_i / W
> 		 */
>-		load = cfs_rq->avg_load;
>-		if (curr && curr->on_rq)
>-			load += scale_load_down(curr->load.weight);
>+		load = cfs_rq->tot_load;
> 
> 		lag *= load + scale_load_down(se->load.weight);
> 		if (WARN_ON_ONCE(!load))
>@@ -5327,6 +5330,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
> 	update_stats_enqueue_fair(cfs_rq, se, flags);
> 	if (!curr)
> 		__enqueue_entity(cfs_rq, se);
>+
>+	avg_vruntime_add(cfs_rq, se);
> 	se->on_rq = 1;
> 
> 	if (cfs_rq->nr_running == 1) {
>@@ -5397,6 +5402,8 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
> 	update_entity_lag(cfs_rq, se);
> 	if (se != cfs_rq->curr)
> 		__dequeue_entity(cfs_rq, se);
>+
>+	avg_vruntime_sub(cfs_rq, se);
> 	se->on_rq = 0;
> 	account_entity_dequeue(cfs_rq, se);
> 
>diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
>index 4c36cc680361..57f07c56ecda 100644
>--- a/kernel/sched/sched.h
>+++ b/kernel/sched/sched.h
>@@ -596,8 +596,9 @@ struct cfs_rq {
> 	unsigned int		idle_nr_running;   /* SCHED_IDLE */
> 	unsigned int		idle_h_nr_running; /* SCHED_IDLE */
> 
>+	s64			tot_vruntime;
> 	s64			avg_vruntime;
>-	u64			avg_load;
>+	u64			tot_load;
> 
> 	u64			exec_clock;
> 	u64			min_vruntime;
>-- 
>2.45.2
[PATCH v2] sched/eevdf: Reduce the computation frequency of avg_vruntime
Posted by Xavier 1 month, 2 weeks ago
The current code subtracts the value of curr from avg_vruntime and avg_load
during runtime. Then, every time avg_vruntime() is called, it adds the
value of curr to the avg_vruntime and avg_load. Afterward, it divides these
and adds min_vruntime to obtain the actual avg_vruntime.

Analysis of the code indicates that avg_vruntime only changes significantly
during update_curr(), update_min_vruntime(), and when tasks are enqueued or
dequeued. Therefore, it is sufficient to recalculate and store avg_vruntime
only in these specific scenarios. This optimization ensures that accessing
avg_vruntime() does not necessitate a recalculation each time, thereby
enhancing the efficiency of the code.

There is no need to subtract curr’s load from avg_load during runtime.
Instead, we only need to calculate the incremental change and update
avg_vruntime whenever curr’s time is updated.

To better represent their functions, rename the original avg_vruntime and
avg_load to tot_vruntime and tot_load, respectively, which more accurately
describes their roles in the computation.

Signed-off-by: Xavier <xavier_qy@163.com>
---

Note:
The patch V2 has been updated based on the latest sched/core branch.

 kernel/sched/fair.c  | 107 ++++++++++++++++++++++++-------------------
 kernel/sched/sched.h |   3 +-
 2 files changed, 61 insertions(+), 49 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 5a621210c9c..fb0434dd0a8 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -606,8 +606,8 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
  * Which we track using:
  *
  *                    v0 := cfs_rq->min_vruntime
- * \Sum (v_i - v0) * w_i := cfs_rq->avg_vruntime
- *              \Sum w_i := cfs_rq->avg_load
+ * \Sum (v_i - v0) * w_i := cfs_rq->tot_vruntime
+ *              \Sum w_i := cfs_rq->tot_load
  *
  * Since min_vruntime is a monotonic increasing variable that closely tracks
  * the per-task service, these deltas: (v_i - v), will be in the order of the
@@ -617,14 +617,29 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
  *
  * As measured, the max (key * weight) value was ~44 bits for a kernel build.
  */
+static inline void avg_vruntime_update(struct cfs_rq *cfs_rq)
+{
+	s64	tot_vruntime = cfs_rq->tot_vruntime;
+
+	/* sign flips effective floor / ceiling */
+	if (cfs_rq->tot_load) {
+		if (tot_vruntime < 0)
+			tot_vruntime -= (cfs_rq->tot_load - 1);
+		cfs_rq->avg_vruntime = div_s64(tot_vruntime, cfs_rq->tot_load);
+	} else {
+		cfs_rq->avg_vruntime = cfs_rq->tot_vruntime;
+	}
+}
+
 static void
 avg_vruntime_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	unsigned long weight = scale_load_down(se->load.weight);
 	s64 key = entity_key(cfs_rq, se);
 
-	cfs_rq->avg_vruntime += key * weight;
-	cfs_rq->avg_load += weight;
+	cfs_rq->tot_vruntime += key * weight;
+	cfs_rq->tot_load += weight;
+	avg_vruntime_update(cfs_rq);
 }
 
 static void
@@ -633,17 +648,29 @@ avg_vruntime_sub(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	unsigned long weight = scale_load_down(se->load.weight);
 	s64 key = entity_key(cfs_rq, se);
 
-	cfs_rq->avg_vruntime -= key * weight;
-	cfs_rq->avg_load -= weight;
+	cfs_rq->tot_vruntime -= key * weight;
+	cfs_rq->tot_load -= weight;
+	avg_vruntime_update(cfs_rq);
+}
+
+static inline
+void avg_vruntime_update_for_curr(struct cfs_rq *cfs_rq, s64 delta)
+{
+	struct sched_entity *curr = cfs_rq->curr;
+	unsigned long weight = scale_load_down(curr->load.weight);
+
+	cfs_rq->tot_vruntime += delta * weight;
+	avg_vruntime_update(cfs_rq);
 }
 
 static inline
-void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
+void avg_vruntime_update_for_minv(struct cfs_rq *cfs_rq, s64 delta)
 {
 	/*
-	 * v' = v + d ==> avg_vruntime' = avg_runtime - d*avg_load
+	 * v' = v + d ==> avg_runtime' = tot_runtime - d*tot_load
 	 */
-	cfs_rq->avg_vruntime -= cfs_rq->avg_load * delta;
+	cfs_rq->tot_vruntime -= cfs_rq->tot_load * delta;
+	avg_vruntime_update(cfs_rq);
 }
 
 /*
@@ -652,25 +679,7 @@ void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
  */
 u64 avg_vruntime(struct cfs_rq *cfs_rq)
 {
-	struct sched_entity *curr = cfs_rq->curr;
-	s64 avg = cfs_rq->avg_vruntime;
-	long load = cfs_rq->avg_load;
-
-	if (curr && curr->on_rq) {
-		unsigned long weight = scale_load_down(curr->load.weight);
-
-		avg += entity_key(cfs_rq, curr) * weight;
-		load += weight;
-	}
-
-	if (load) {
-		/* sign flips effective floor / ceiling */
-		if (avg < 0)
-			avg -= (load - 1);
-		avg = div_s64(avg, load);
-	}
-
-	return cfs_rq->min_vruntime + avg;
+	return cfs_rq->min_vruntime + cfs_rq->avg_vruntime;
 }
 
 /*
@@ -725,18 +734,8 @@ static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
  */
 static int vruntime_eligible(struct cfs_rq *cfs_rq, u64 vruntime)
 {
-	struct sched_entity *curr = cfs_rq->curr;
-	s64 avg = cfs_rq->avg_vruntime;
-	long load = cfs_rq->avg_load;
-
-	if (curr && curr->on_rq) {
-		unsigned long weight = scale_load_down(curr->load.weight);
-
-		avg += entity_key(cfs_rq, curr) * weight;
-		load += weight;
-	}
-
-	return avg >= (s64)(vruntime - cfs_rq->min_vruntime) * load;
+	return cfs_rq->tot_vruntime >=
+			(s64)(vruntime - cfs_rq->min_vruntime) * (s64)cfs_rq->tot_load;
 }
 
 int entity_eligible(struct cfs_rq *cfs_rq, struct sched_entity *se)
@@ -752,7 +751,7 @@ static u64 __update_min_vruntime(struct cfs_rq *cfs_rq, u64 vruntime)
 	 */
 	s64 delta = (s64)(vruntime - min_vruntime);
 	if (delta > 0) {
-		avg_vruntime_update(cfs_rq, delta);
+		avg_vruntime_update_for_minv(cfs_rq, delta);
 		min_vruntime = vruntime;
 	}
 	return min_vruntime;
@@ -851,7 +850,6 @@ RB_DECLARE_CALLBACKS(static, min_vruntime_cb, struct sched_entity,
  */
 static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
-	avg_vruntime_add(cfs_rq, se);
 	se->min_vruntime = se->vruntime;
 	se->min_slice = se->slice;
 	rb_add_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
@@ -862,7 +860,6 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	rb_erase_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
 				  &min_vruntime_cb);
-	avg_vruntime_sub(cfs_rq, se);
 }
 
 struct sched_entity *__pick_root_entity(struct cfs_rq *cfs_rq)
@@ -1219,6 +1216,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
 	struct rq *rq = rq_of(cfs_rq);
 	s64 delta_exec;
 	bool resched;
+	s64 vdelta_exec;
 
 	if (unlikely(!curr))
 		return;
@@ -1227,8 +1225,10 @@ static void update_curr(struct cfs_rq *cfs_rq)
 	if (unlikely(delta_exec <= 0))
 		return;
 
-	curr->vruntime += calc_delta_fair(delta_exec, curr);
+	vdelta_exec = calc_delta_fair(delta_exec, curr);
+	curr->vruntime += vdelta_exec;
 	resched = update_deadline(cfs_rq, curr);
+	avg_vruntime_update_for_curr(cfs_rq, vdelta_exec);
 	update_min_vruntime(cfs_rq);
 
 	if (entity_is_task(curr)) {
@@ -3883,6 +3883,8 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
 		avruntime = avg_vruntime(cfs_rq);
 		if (!curr)
 			__dequeue_entity(cfs_rq, se);
+
+		avg_vruntime_sub(cfs_rq, se);
 		update_load_sub(&cfs_rq->load, se->load.weight);
 	}
 	dequeue_load_avg(cfs_rq, se);
@@ -3913,6 +3915,8 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
 		if (!curr)
 			__enqueue_entity(cfs_rq, se);
 
+		avg_vruntime_add(cfs_rq, se);
+
 		/*
 		 * The entity's vruntime has been adjusted, so let's check
 		 * whether the rq-wide min_vruntime needs updated too. Since
@@ -5281,7 +5285,6 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 	 * EEVDF: placement strategy #1 / #2
 	 */
 	if (sched_feat(PLACE_LAG) && cfs_rq->nr_running && se->vlag) {
-		struct sched_entity *curr = cfs_rq->curr;
 		unsigned long load;
 
 		lag = se->vlag;
@@ -5338,9 +5341,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 		 *
 		 *   vl_i = (W + w_i)*vl'_i / W
 		 */
-		load = cfs_rq->avg_load;
-		if (curr && curr->on_rq)
-			load += scale_load_down(curr->load.weight);
+		load = cfs_rq->tot_load;
 
 		lag *= load + scale_load_down(se->load.weight);
 		if (WARN_ON_ONCE(!load))
@@ -5427,6 +5428,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 	update_stats_enqueue_fair(cfs_rq, se, flags);
 	if (!curr)
 		__enqueue_entity(cfs_rq, se);
+
+	avg_vruntime_add(cfs_rq, se);
 	se->on_rq = 1;
 
 	if (cfs_rq->nr_running == 1) {
@@ -5530,6 +5533,8 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 
 	if (se != cfs_rq->curr)
 		__dequeue_entity(cfs_rq, se);
+
+	avg_vruntime_sub(cfs_rq, se);
 	se->on_rq = 0;
 	account_entity_dequeue(cfs_rq, se);
 
@@ -6924,11 +6929,17 @@ requeue_delayed_entity(struct sched_entity *se)
 			cfs_rq->nr_running--;
 			if (se != cfs_rq->curr)
 				__dequeue_entity(cfs_rq, se);
+			avg_vruntime_sub(cfs_rq, se);
+
 			se->vlag = 0;
 			place_entity(cfs_rq, se, 0);
+
 			if (se != cfs_rq->curr)
 				__enqueue_entity(cfs_rq, se);
+			avg_vruntime_add(cfs_rq, se);
 			cfs_rq->nr_running++;
+
+			update_min_vruntime(cfs_rq);
 		}
 	}
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index b1c3588a8f0..7f7c93518c7 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -650,8 +650,9 @@ struct cfs_rq {
 	unsigned int		idle_nr_running;   /* SCHED_IDLE */
 	unsigned int		idle_h_nr_running; /* SCHED_IDLE */
 
+	s64			tot_vruntime;
 	s64			avg_vruntime;
-	u64			avg_load;
+	u64			tot_load;
 
 	u64			min_vruntime;
 #ifdef CONFIG_SCHED_CORE
-- 
2.45.2

Re: [PATCH v2] sched/eevdf: Reduce the computation frequency of avg_vruntime
Posted by kernel test robot 1 month, 1 week ago

Hello,

kernel test robot noticed a -13.0% regression of will-it-scale.per_thread_ops on:


commit: 538d813df3945cbc9d6a90ba224f36c78c8bb128 ("[PATCH v2] sched/eevdf: Reduce the computation frequency of avg_vruntime")
url: https://github.com/intel-lab-lkp/linux/commits/Xavier/sched-eevdf-Reduce-the-computation-frequency-of-avg_vruntime/20241011-142820
base: https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git 7266f0a6d3bb73f42ea06656d3cc48c7d0386f71
patch link: https://lore.kernel.org/all/20241011062449.998696-1-xavier_qy@163.com/
patch subject: [PATCH v2] sched/eevdf: Reduce the computation frequency of avg_vruntime

testcase: will-it-scale
config: x86_64-rhel-8.3
compiler: gcc-12
test machine: 224 threads 4 sockets Intel(R) Xeon(R) Platinum 8380H CPU @ 2.90GHz (Cooper Lake) with 192G memory
parameters:

	nr_task: 100%
	mode: thread
	test: sched_yield
	cpufreq_governor: performance


In addition to that, the commit also has significant impact on the following tests:

+------------------+--------------------------------------------------------------------------------------------+
| testcase: change | aim7: aim7.jobs-per-min -16.4% regression                                                  |
| test machine     | 128 threads 2 sockets Intel(R) Xeon(R) Gold 6338 CPU @ 2.00GHz (Ice Lake) with 256G memory |
| test parameters  | cpufreq_governor=performance                                                               |
|                  | disk=1BRD_48G                                                                              |
|                  | fs=xfs                                                                                     |
|                  | load=3000                                                                                  |
|                  | test=disk_rr                                                                               |
+------------------+--------------------------------------------------------------------------------------------+
| testcase: change | will-it-scale: will-it-scale.per_thread_ops -4.1% regression                               |
| test machine     | 104 threads 2 sockets (Skylake) with 192G memory                                           |
| test parameters  | cpufreq_governor=performance                                                               |
|                  | mode=thread                                                                                |
|                  | nr_task=100%                                                                               |
|                  | test=sched_yield                                                                           |
+------------------+--------------------------------------------------------------------------------------------+


If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <oliver.sang@intel.com>
| Closes: https://lore.kernel.org/oe-lkp/202410171258.5873933a-oliver.sang@intel.com


Details are as below:
-------------------------------------------------------------------------------------------------->


The kernel config and materials to reproduce are available at:
https://download.01.org/0day-ci/archive/20241017/202410171258.5873933a-oliver.sang@intel.com

=========================================================================================
compiler/cpufreq_governor/kconfig/mode/nr_task/rootfs/tbox_group/test/testcase:
  gcc-12/performance/x86_64-rhel-8.3/thread/100%/debian-12-x86_64-20240206.cgz/lkp-cpl-4sp2/sched_yield/will-it-scale

commit: 
  7266f0a6d3 ("fs/bcachefs: Fix __wait_on_freeing_inode() definition of waitqueue entry")
  538d813df3 ("sched/eevdf: Reduce the computation frequency of avg_vruntime")

7266f0a6d3bb73f4 538d813df3945cbc9d6a90ba224 
---------------- --------------------------- 
         %stddev     %change         %stddev
             \          |                \  
     10.51            -1.4        9.07        mpstat.cpu.all.usr%
     77573 ±  7%     +42.8%     110799 ± 31%  sched_debug.cpu.avg_idle.stddev
 5.853e+08           -13.0%  5.092e+08        will-it-scale.224.threads
   2612822           -13.0%    2273147        will-it-scale.per_thread_ops
 5.853e+08           -13.0%  5.092e+08        will-it-scale.workload
      0.02 ± 46%     -44.2%       0.01 ±  6%  perf-sched.sch_delay.avg.ms.smpboot_thread_fn.kthread.ret_from_fork.ret_from_fork_asm
      2.37 ±190%     -95.5%       0.11 ± 19%  perf-sched.sch_delay.max.ms.smpboot_thread_fn.kthread.ret_from_fork.ret_from_fork_asm
      1.10 ± 47%    +107.4%       2.29 ± 45%  perf-sched.wait_time.avg.ms.irqentry_exit_to_user_mode.asm_sysvec_apic_timer_interrupt.[unknown].[unknown]
     25.62 ±203%    +590.0%     176.73 ± 90%  perf-sched.wait_time.avg.ms.pipe_read.vfs_read.ksys_read.do_syscall_64
 1.692e+11           -10.9%  1.509e+11        perf-stat.i.branch-instructions
 1.759e+08           -10.1%  1.582e+08        perf-stat.i.branch-misses
      1.13           +10.3%       1.24 ±  2%  perf-stat.i.cpi
 6.786e+11            -9.2%  6.164e+11        perf-stat.i.instructions
      0.90            -9.3%       0.82        perf-stat.i.ipc
      1.11           +10.4%       1.22        perf-stat.overall.cpi
      0.90            -9.4%       0.82        perf-stat.overall.ipc
    350712            +4.5%     366647        perf-stat.overall.path-length
 1.686e+11           -10.9%  1.502e+11        perf-stat.ps.branch-instructions
 1.754e+08           -10.0%  1.579e+08 ±  2%  perf-stat.ps.branch-misses
 6.761e+11            -9.2%   6.14e+11        perf-stat.ps.instructions
 2.053e+14            -9.1%  1.867e+14        perf-stat.total.instructions
      3.82            -2.2        1.60 ±  2%  perf-profile.calltrace.cycles-pp.pick_eevdf.pick_task_fair.pick_next_task_fair.__schedule.schedule
     13.75            -1.6       12.18        perf-profile.calltrace.cycles-pp.clear_bhb_loop.__sched_yield
     10.86            -1.3        9.52        perf-profile.calltrace.cycles-pp.entry_SYSCALL_64.__sched_yield
      7.48            -1.0        6.45        perf-profile.calltrace.cycles-pp.do_sched_yield.__x64_sys_sched_yield.do_syscall_64.entry_SYSCALL_64_after_hwframe.__sched_yield
      1.32 ± 22%      -0.7        0.60 ± 10%  perf-profile.calltrace.cycles-pp.testcase
      5.09            -0.5        4.54        perf-profile.calltrace.cycles-pp.update_rq_clock.__schedule.schedule.__x64_sys_sched_yield.do_syscall_64
      2.22 ±  7%      -0.5        1.69 ± 14%  perf-profile.calltrace.cycles-pp.perf_trace_sched_stat_runtime.update_curr.pick_task_fair.pick_next_task_fair.__schedule
      3.74            -0.5        3.23        perf-profile.calltrace.cycles-pp.update_rq_clock_task.__schedule.schedule.__x64_sys_sched_yield.do_syscall_64
      4.00            -0.4        3.56        perf-profile.calltrace.cycles-pp.sched_clock_cpu.update_rq_clock.__schedule.schedule.__x64_sys_sched_yield
      3.50            -0.4        3.13        perf-profile.calltrace.cycles-pp.sched_clock.sched_clock_cpu.update_rq_clock.__schedule.schedule
      3.31            -0.4        2.95        perf-profile.calltrace.cycles-pp.native_sched_clock.sched_clock.sched_clock_cpu.update_rq_clock.__schedule
      2.78            -0.4        2.43        perf-profile.calltrace.cycles-pp.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe.__sched_yield
      2.12            -0.3        1.82        perf-profile.calltrace.cycles-pp.raw_spin_rq_lock_nested.do_sched_yield.__x64_sys_sched_yield.do_syscall_64.entry_SYSCALL_64_after_hwframe
      2.14            -0.3        1.85        perf-profile.calltrace.cycles-pp.syscall_return_via_sysret.__sched_yield
      1.85            -0.3        1.58        perf-profile.calltrace.cycles-pp.update_curr_se.update_curr.pick_task_fair.pick_next_task_fair.__schedule
      2.04            -0.3        1.78        perf-profile.calltrace.cycles-pp._raw_spin_lock.raw_spin_rq_lock_nested.do_sched_yield.__x64_sys_sched_yield.do_syscall_64
      2.02            -0.3        1.76        perf-profile.calltrace.cycles-pp._raw_spin_lock.__schedule.schedule.__x64_sys_sched_yield.do_syscall_64
      2.12            -0.3        1.86        perf-profile.calltrace.cycles-pp.x64_sys_call.do_syscall_64.entry_SYSCALL_64_after_hwframe.__sched_yield
      1.13            -0.1        0.98        perf-profile.calltrace.cycles-pp.yield_task_fair.do_sched_yield.__x64_sys_sched_yield.do_syscall_64.entry_SYSCALL_64_after_hwframe
      0.95 ±  4%      -0.1        0.82 ±  6%  perf-profile.calltrace.cycles-pp.rcu_note_context_switch.__schedule.schedule.__x64_sys_sched_yield.do_syscall_64
      1.07            -0.1        0.96        perf-profile.calltrace.cycles-pp.syscall_exit_to_user_mode_prepare.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe.__sched_yield
      0.77            -0.1        0.66        perf-profile.calltrace.cycles-pp.entry_SYSCALL_64_safe_stack.__sched_yield
      1.66            -0.1        1.57        perf-profile.calltrace.cycles-pp.__calc_delta.update_curr.pick_task_fair.pick_next_task_fair.__schedule
     98.53            +0.3       98.82        perf-profile.calltrace.cycles-pp.__sched_yield
     69.38            +3.8       73.16        perf-profile.calltrace.cycles-pp.entry_SYSCALL_64_after_hwframe.__sched_yield
     67.49            +4.0       71.53        perf-profile.calltrace.cycles-pp.do_syscall_64.entry_SYSCALL_64_after_hwframe.__sched_yield
      2.87            +5.0        7.91        perf-profile.calltrace.cycles-pp.update_min_vruntime.update_curr.pick_task_fair.pick_next_task_fair.__schedule
     58.92            +5.1       64.03        perf-profile.calltrace.cycles-pp.__x64_sys_sched_yield.do_syscall_64.entry_SYSCALL_64_after_hwframe.__sched_yield
     51.95            +6.0       57.99        perf-profile.calltrace.cycles-pp.schedule.__x64_sys_sched_yield.do_syscall_64.entry_SYSCALL_64_after_hwframe.__sched_yield
     50.08            +6.3       56.34        perf-profile.calltrace.cycles-pp.__schedule.schedule.__x64_sys_sched_yield.do_syscall_64.entry_SYSCALL_64_after_hwframe
     32.18            +8.4       40.56        perf-profile.calltrace.cycles-pp.pick_next_task_fair.__schedule.schedule.__x64_sys_sched_yield.do_syscall_64
     30.20            +8.5       38.74        perf-profile.calltrace.cycles-pp.pick_task_fair.pick_next_task_fair.__schedule.schedule.__x64_sys_sched_yield
     22.44           +11.5       33.93        perf-profile.calltrace.cycles-pp.update_curr.pick_task_fair.pick_next_task_fair.__schedule.schedule
      4.00            -2.3        1.65 ±  2%  perf-profile.children.cycles-pp.pick_eevdf
     13.84            -1.6       12.26        perf-profile.children.cycles-pp.clear_bhb_loop
      6.99            -0.9        6.05        perf-profile.children.cycles-pp.do_sched_yield
      6.97            -0.9        6.10        perf-profile.children.cycles-pp.entry_SYSCALL_64
      5.18            -0.6        4.62        perf-profile.children.cycles-pp.update_rq_clock
      4.22            -0.5        3.68        perf-profile.children.cycles-pp._raw_spin_lock
      2.27 ±  6%      -0.5        1.76 ± 13%  perf-profile.children.cycles-pp.perf_trace_sched_stat_runtime
      3.77            -0.5        3.26        perf-profile.children.cycles-pp.update_rq_clock_task
      4.04            -0.5        3.56        perf-profile.children.cycles-pp.entry_SYSRETQ_unsafe_stack
      4.18            -0.5        3.72        perf-profile.children.cycles-pp.sched_clock_cpu
      3.77            -0.4        3.36        perf-profile.children.cycles-pp.sched_clock
      2.92            -0.4        2.55        perf-profile.children.cycles-pp.syscall_exit_to_user_mode
      3.36            -0.4        3.00        perf-profile.children.cycles-pp.native_sched_clock
      2.38            -0.3        2.08        perf-profile.children.cycles-pp.raw_spin_rq_lock_nested
      2.26            -0.3        1.96        perf-profile.children.cycles-pp.syscall_return_via_sysret
      2.03            -0.3        1.74        perf-profile.children.cycles-pp.update_curr_se
      2.29            -0.3        2.01        perf-profile.children.cycles-pp.x64_sys_call
      0.97 ±  8%      -0.3        0.70 ± 10%  perf-profile.children.cycles-pp.testcase
      1.15            -0.1        1.00        perf-profile.children.cycles-pp.yield_task_fair
      1.01 ±  4%      -0.1        0.87 ±  5%  perf-profile.children.cycles-pp.rcu_note_context_switch
      1.16            -0.1        1.03        perf-profile.children.cycles-pp.syscall_exit_to_user_mode_prepare
      0.80            -0.1        0.68        perf-profile.children.cycles-pp.entry_SYSCALL_64_safe_stack
      1.69            -0.1        1.60        perf-profile.children.cycles-pp.__calc_delta
      0.50 ±  2%      -0.1        0.43 ±  2%  perf-profile.children.cycles-pp.raw_spin_rq_unlock
      0.47 ±  3%      -0.1        0.40 ±  2%  perf-profile.children.cycles-pp.sched_update_worker
      0.31 ±  3%      -0.0        0.27 ±  5%  perf-profile.children.cycles-pp.arch_scale_cpu_capacity
      0.25            -0.0        0.22 ±  2%  perf-profile.children.cycles-pp.sched_clock_noinstr
     99.24            +0.1       99.32        perf-profile.children.cycles-pp.__sched_yield
     69.66            +3.7       73.41        perf-profile.children.cycles-pp.entry_SYSCALL_64_after_hwframe
     67.81            +4.0       71.82        perf-profile.children.cycles-pp.do_syscall_64
     59.71            +5.0       64.72        perf-profile.children.cycles-pp.__x64_sys_sched_yield
      2.92            +5.0        7.94        perf-profile.children.cycles-pp.update_min_vruntime
     51.97            +6.0       58.01        perf-profile.children.cycles-pp.schedule
     50.53            +6.2       56.74        perf-profile.children.cycles-pp.__schedule
     32.38            +8.4       40.74        perf-profile.children.cycles-pp.pick_next_task_fair
     30.70            +8.4       39.14        perf-profile.children.cycles-pp.pick_task_fair
     23.52           +11.0       34.54        perf-profile.children.cycles-pp.update_curr
      3.60            -2.2        1.36 ±  3%  perf-profile.self.cycles-pp.pick_eevdf
     13.76            -1.6       12.18        perf-profile.self.cycles-pp.clear_bhb_loop
      5.71            -0.8        4.90        perf-profile.self.cycles-pp.__sched_yield
      6.12            -0.7        5.40        perf-profile.self.cycles-pp.__schedule
      3.95            -0.6        3.40        perf-profile.self.cycles-pp._raw_spin_lock
      2.20 ±  6%      -0.5        1.70 ± 14%  perf-profile.self.cycles-pp.perf_trace_sched_stat_runtime
      3.56            -0.5        3.07        perf-profile.self.cycles-pp.update_rq_clock_task
      3.92            -0.5        3.44        perf-profile.self.cycles-pp.entry_SYSRETQ_unsafe_stack
      3.07            -0.4        2.67        perf-profile.self.cycles-pp.entry_SYSCALL_64
      2.96            -0.4        2.59        perf-profile.self.cycles-pp.do_syscall_64
      3.28            -0.4        2.92        perf-profile.self.cycles-pp.native_sched_clock
      2.41            -0.3        2.09        perf-profile.self.cycles-pp.do_sched_yield
      3.59            -0.3        3.28        perf-profile.self.cycles-pp.pick_task_fair
      2.25            -0.3        1.96        perf-profile.self.cycles-pp.syscall_return_via_sysret
      1.87            -0.3        1.61        perf-profile.self.cycles-pp.entry_SYSCALL_64_after_hwframe
      2.18            -0.3        1.92        perf-profile.self.cycles-pp.x64_sys_call
      1.85            -0.3        1.59        perf-profile.self.cycles-pp.update_curr_se
      1.73            -0.2        1.48        perf-profile.self.cycles-pp.syscall_exit_to_user_mode
      1.44 ±  3%      -0.2        1.26        perf-profile.self.cycles-pp.schedule
      1.02            -0.2        0.86        perf-profile.self.cycles-pp.yield_task_fair
      0.92 ±  4%      -0.1        0.79 ±  5%  perf-profile.self.cycles-pp.rcu_note_context_switch
      0.79            -0.1        0.68        perf-profile.self.cycles-pp.__x64_sys_sched_yield
      0.79            -0.1        0.68        perf-profile.self.cycles-pp.entry_SYSCALL_64_safe_stack
      1.04            -0.1        0.94        perf-profile.self.cycles-pp.update_rq_clock
      1.04            -0.1        0.93        perf-profile.self.cycles-pp.syscall_exit_to_user_mode_prepare
      1.65 ±  2%      -0.1        1.56 ±  3%  perf-profile.self.cycles-pp.pick_next_task_fair
      1.64            -0.1        1.56        perf-profile.self.cycles-pp.__calc_delta
      0.41 ±  5%      -0.1        0.33 ±  5%  perf-profile.self.cycles-pp.testcase
      0.43            -0.1        0.36        perf-profile.self.cycles-pp.raw_spin_rq_unlock
      0.46 ±  3%      -0.1        0.40 ±  2%  perf-profile.self.cycles-pp.sched_update_worker
      0.48            -0.0        0.43        perf-profile.self.cycles-pp.sched_clock_cpu
      0.29            -0.0        0.26 ±  2%  perf-profile.self.cycles-pp.arch_scale_cpu_capacity
      0.26            -0.0        0.23        perf-profile.self.cycles-pp.sched_clock
      2.58            +5.3        7.92        perf-profile.self.cycles-pp.update_min_vruntime
      8.36 ±  5%      +7.1       15.45 ±  2%  perf-profile.self.cycles-pp.update_curr


***************************************************************************************************
lkp-icl-2sp2: 128 threads 2 sockets Intel(R) Xeon(R) Gold 6338 CPU @ 2.00GHz (Ice Lake) with 256G memory
=========================================================================================
compiler/cpufreq_governor/disk/fs/kconfig/load/rootfs/tbox_group/test/testcase:
  gcc-12/performance/1BRD_48G/xfs/x86_64-rhel-8.3/3000/debian-12-x86_64-20240206.cgz/lkp-icl-2sp2/disk_rr/aim7

commit: 
  7266f0a6d3 ("fs/bcachefs: Fix __wait_on_freeing_inode() definition of waitqueue entry")
  538d813df3 ("sched/eevdf: Reduce the computation frequency of avg_vruntime")

7266f0a6d3bb73f4 538d813df3945cbc9d6a90ba224 
---------------- --------------------------- 
         %stddev     %change         %stddev
             \          |                \  
 3.288e+09 ±  2%     +21.9%  4.008e+09 ±  3%  cpuidle..time
   2225204           +34.7%    2996318        cpuidle..usage
     65110           -10.0%      58593 ±  3%  vmstat.system.cs
    164997           -12.3%     144664 ±  3%  vmstat.system.in
     78.57            +5.4%      82.82        iostat.cpu.idle
     19.74           -20.7%      15.66 ±  3%  iostat.cpu.system
      1.68 ±  2%      -9.3%       1.52        iostat.cpu.user
      0.10 ±  2%      +0.0        0.11 ±  3%  mpstat.cpu.all.soft%
     20.61            -4.5       16.11 ±  3%  mpstat.cpu.all.sys%
      1.75 ±  2%      -0.2        1.55        mpstat.cpu.all.usr%
     38.50           -24.7%      29.00        mpstat.max_utilization_pct
     20.45 ± 35%     -40.8%      12.10 ± 66%  sched_debug.cfs_rq:/.removed.runnable_avg.avg
     93.27 ± 14%     -32.3%      63.15 ± 50%  sched_debug.cfs_rq:/.removed.runnable_avg.stddev
     20.45 ± 35%     -40.8%      12.10 ± 66%  sched_debug.cfs_rq:/.removed.util_avg.avg
     93.27 ± 14%     -32.3%      63.15 ± 50%  sched_debug.cfs_rq:/.removed.util_avg.stddev
    626563           -16.4%     524042        aim7.jobs-per-min
     28.92           +19.5%      34.54        aim7.time.elapsed_time
     28.92           +19.5%      34.54        aim7.time.elapsed_time.max
    106217 ±  2%     -54.4%      48479 ±  4%  aim7.time.involuntary_context_switches
      2520           -20.9%       1994        aim7.time.percent_of_cpu_this_job_got
    693.43            -6.0%     652.11        aim7.time.system_time
    454176            +7.3%     487213        aim7.time.voluntary_context_switches
     24117 ± 12%     +58.8%      38288 ±  9%  meminfo.Active
     19543 ± 14%     +81.6%      35485 ± 10%  meminfo.Active(anon)
      4574 ±  6%     -38.7%       2803 ± 13%  meminfo.Active(file)
    864807           +11.5%     964505        meminfo.Dirty
    866352           +11.8%     968424        meminfo.Inactive(file)
     81362 ±  3%     +24.8%     101538 ± 10%  meminfo.Mapped
     55814 ±  4%     +52.6%      85147 ±  9%  meminfo.Shmem
      2248 ±  9%     -36.3%       1432 ±  8%  numa-meminfo.node0.Active(file)
    435013           +12.2%     488255        numa-meminfo.node0.Dirty
    435838           +12.5%     490301 ±  2%  numa-meminfo.node0.Inactive(file)
     18186 ± 13%     +77.5%      32275 ±  8%  numa-meminfo.node1.Active
     15931 ± 15%     +94.2%      30941 ±  9%  numa-meminfo.node1.Active(anon)
      2254 ± 14%     -40.9%       1333 ± 19%  numa-meminfo.node1.Active(file)
    429153 ±  2%     +10.9%     475856 ±  3%  numa-meminfo.node1.Dirty
    429881 ±  2%     +11.1%     477485 ±  3%  numa-meminfo.node1.Inactive(file)
     48401 ±  5%     +57.8%      76356 ±  8%  numa-meminfo.node1.Shmem
    570.46 ±  4%     -33.5%     379.55 ± 18%  numa-vmstat.node0.nr_active_file
    108480           +13.4%     123017 ±  3%  numa-vmstat.node0.nr_dirty
    108626           +13.7%     123557 ±  3%  numa-vmstat.node0.nr_inactive_file
    574.96 ±  4%     -33.5%     382.41 ± 20%  numa-vmstat.node0.nr_zone_active_file
    108616           +13.8%     123563 ±  3%  numa-vmstat.node0.nr_zone_inactive_file
    108480           +13.4%     123031 ±  3%  numa-vmstat.node0.nr_zone_write_pending
      3992 ± 15%     +88.0%       7505 ± 12%  numa-vmstat.node1.nr_active_anon
    535.60 ±  8%     -43.4%     303.23 ± 20%  numa-vmstat.node1.nr_active_file
    106987 ±  2%     +12.1%     119954 ±  3%  numa-vmstat.node1.nr_dirty
    107155 ±  2%     +12.3%     120335 ±  2%  numa-vmstat.node1.nr_inactive_file
     12109 ±  5%     +55.3%      18803 ± 11%  numa-vmstat.node1.nr_shmem
      3992 ± 15%     +88.0%       7505 ± 12%  numa-vmstat.node1.nr_zone_active_anon
    531.79 ±  7%     -44.0%     297.86 ± 22%  numa-vmstat.node1.nr_zone_active_file
    107160 ±  2%     +12.3%     120329 ±  2%  numa-vmstat.node1.nr_zone_inactive_file
    106991 ±  2%     +12.1%     119949 ±  3%  numa-vmstat.node1.nr_zone_write_pending
      4894 ± 14%     +78.8%       8753 ±  8%  proc-vmstat.nr_active_anon
      1177 ±  7%     -47.3%     620.85 ± 11%  proc-vmstat.nr_active_file
    215885           +12.0%     241890        proc-vmstat.nr_dirty
   1028576            +3.3%    1062036        proc-vmstat.nr_file_pages
    205186            +2.5%     210328        proc-vmstat.nr_inactive_anon
    216249           +12.3%     242846        proc-vmstat.nr_inactive_file
     65985            +2.3%      67471        proc-vmstat.nr_kernel_stack
     20669 ±  3%     +23.9%      25606 ±  7%  proc-vmstat.nr_mapped
     13964 ±  4%     +53.1%      21372 ±  7%  proc-vmstat.nr_shmem
     34179            +2.8%      35137        proc-vmstat.nr_slab_reclaimable
     91842            +1.6%      93353        proc-vmstat.nr_slab_unreclaimable
      4894 ± 14%     +78.8%       8753 ±  8%  proc-vmstat.nr_zone_active_anon
      1177 ±  7%     -47.2%     621.97 ± 11%  proc-vmstat.nr_zone_active_file
    205186            +2.5%     210328        proc-vmstat.nr_zone_inactive_anon
    216249           +12.3%     242846        proc-vmstat.nr_zone_inactive_file
    215886           +12.0%     241890        proc-vmstat.nr_zone_write_pending
      1544 ±206%    +815.2%      14133 ± 47%  proc-vmstat.numa_hint_faults
     93.17 ± 76%   +9039.0%       8514 ± 63%  proc-vmstat.numa_pages_migrated
     33520 ± 95%    +238.4%     113448 ± 18%  proc-vmstat.numa_pte_updates
    401728           +27.3%     511275        proc-vmstat.pgfault
     93.17 ± 76%   +9039.0%       8514 ± 63%  proc-vmstat.pgmigrate_success
    213998           +21.9%     260928 ±  3%  proc-vmstat.pgpgout
     16275 ±  6%     +14.0%      18549 ±  5%  proc-vmstat.pgreuse
      1.32 ±  2%     +10.9%       1.46 ±  2%  perf-stat.i.MPKI
  1.66e+10 ±  2%     -15.6%  1.401e+10        perf-stat.i.branch-instructions
      1.82 ±  2%      -0.2        1.58 ±  4%  perf-stat.i.branch-miss-rate%
  63877077 ±  3%     -15.6%   53893091 ±  2%  perf-stat.i.branch-misses
     20.49            +1.8       22.29        perf-stat.i.cache-miss-rate%
 1.364e+08 ±  2%     -12.8%  1.189e+08        perf-stat.i.cache-misses
 5.295e+08 ±  2%     -15.6%  4.469e+08        perf-stat.i.cache-references
     70732 ±  2%     -10.9%      63025 ±  2%  perf-stat.i.context-switches
 6.905e+10 ±  2%     -18.9%  5.602e+10        perf-stat.i.cpu-cycles
      2264 ±  2%     -31.3%       1556 ±  3%  perf-stat.i.cpu-migrations
 7.571e+10 ±  2%     -15.7%  6.382e+10        perf-stat.i.instructions
     36.23 ± 23%     -50.3%      18.01 ± 50%  perf-stat.i.major-faults
     11250 ±  4%      +7.1%      12053 ±  3%  perf-stat.i.minor-faults
     11286 ±  4%      +7.0%      12071 ±  3%  perf-stat.i.page-faults
      1.81            +3.2%       1.86        perf-stat.overall.MPKI
     25.78            +0.8       26.62        perf-stat.overall.cache-miss-rate%
      0.91            -3.8%       0.88        perf-stat.overall.cpi
    505.72            -6.8%     471.32        perf-stat.overall.cycles-between-cache-misses
      1.09            +3.9%       1.14        perf-stat.overall.ipc
 1.574e+10 ±  2%     -13.9%  1.355e+10 ±  2%  perf-stat.ps.branch-instructions
  59346827 ±  3%     -12.8%   51745536        perf-stat.ps.branch-misses
 1.296e+08 ±  3%     -11.2%  1.151e+08 ±  2%  perf-stat.ps.cache-misses
 5.028e+08 ±  3%     -14.0%  4.324e+08        perf-stat.ps.cache-references
     67073 ±  3%      -9.1%      60976 ±  2%  perf-stat.ps.context-switches
 6.555e+10 ±  2%     -17.2%  5.425e+10 ±  2%  perf-stat.ps.cpu-cycles
      2163 ±  2%     -29.4%       1527 ±  2%  perf-stat.ps.cpu-migrations
 7.176e+10 ±  2%     -14.0%  6.172e+10 ±  2%  perf-stat.ps.instructions
     10680 ±  4%     +11.3%      11882 ±  2%  perf-stat.ps.minor-faults
     10719 ±  4%     +11.1%      11907 ±  2%  perf-stat.ps.page-faults
      0.01 ± 30%    +511.8%       0.05 ± 19%  perf-sched.sch_delay.avg.ms.__cond_resched.__wait_for_common.affine_move_task.__set_cpus_allowed_ptr.__sched_setaffinity
      0.00 ±223%   +1575.0%       0.01 ± 28%  perf-sched.sch_delay.avg.ms.__cond_resched.dput.__fput.__x64_sys_close.do_syscall_64
      0.00 ±145%   +1020.0%       0.01 ± 28%  perf-sched.sch_delay.avg.ms.__cond_resched.dput.terminate_walk.path_openat.do_filp_open
      0.00 ±223%   +1133.3%       0.01 ±  7%  perf-sched.sch_delay.avg.ms.__cond_resched.task_work_run.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe
      0.00 ± 62%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.do_nanosleep.hrtimer_nanosleep.__x64_sys_nanosleep.do_syscall_64
      0.00 ± 17%     +69.6%       0.01 ±  7%  perf-sched.sch_delay.avg.ms.irq_thread.kthread.ret_from_fork.ret_from_fork_asm
      0.00 ±145%   +1180.0%       0.01 ± 73%  perf-sched.sch_delay.avg.ms.irqentry_exit_to_user_mode.asm_exc_page_fault.[unknown].[unknown]
      0.01 ± 33%     -70.8%       0.00 ± 20%  perf-sched.sch_delay.avg.ms.pipe_read.vfs_read.ksys_read.do_syscall_64
      0.01 ±  9%     -45.5%       0.00 ± 72%  perf-sched.sch_delay.avg.ms.rcu_gp_kthread.kthread.ret_from_fork.ret_from_fork_asm
      0.00 ± 10%     +32.1%       0.01 ±  6%  perf-sched.sch_delay.avg.ms.schedule_timeout.__wait_for_common.wait_for_completion_state.kernel_clone
      0.01 ± 15%     +91.4%       0.01 ± 43%  perf-sched.sch_delay.avg.ms.schedule_timeout.kcompactd.kthread.ret_from_fork
      0.01 ±  8%    +100.0%       0.01 ± 37%  perf-sched.sch_delay.avg.ms.schedule_timeout.rcu_gp_fqs_loop.rcu_gp_kthread.kthread
      0.01 ± 11%     +38.5%       0.01        perf-sched.sch_delay.avg.ms.schedule_timeout.xfsaild.kthread.ret_from_fork
      0.00 ±142%    +369.2%       0.01 ± 15%  perf-sched.sch_delay.max.ms.__cond_resched.__dentry_kill.shrink_dentry_list.shrink_dcache_parent.d_invalidate
      0.73 ± 99%    +286.5%       2.84 ± 13%  perf-sched.sch_delay.max.ms.__cond_resched.__wait_for_common.affine_move_task.__set_cpus_allowed_ptr.__sched_setaffinity
      0.00 ±223%   +2900.0%       0.02 ± 44%  perf-sched.sch_delay.max.ms.__cond_resched.dput.__fput.__x64_sys_close.do_syscall_64
      0.00 ±145%   +1640.0%       0.01 ± 40%  perf-sched.sch_delay.max.ms.__cond_resched.dput.terminate_walk.path_openat.do_filp_open
      0.00 ±143%    +350.0%       0.01 ±  7%  perf-sched.sch_delay.max.ms.__cond_resched.process_one_work.worker_thread.kthread.ret_from_fork
      0.00 ±223%   +1750.0%       0.02 ±  5%  perf-sched.sch_delay.max.ms.__cond_resched.task_work_run.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe
      0.01 ± 88%    -100.0%       0.00        perf-sched.sch_delay.max.ms.do_nanosleep.hrtimer_nanosleep.__x64_sys_nanosleep.do_syscall_64
      0.01 ± 11%   +2974.3%       0.36 ± 40%  perf-sched.sch_delay.max.ms.do_wait.kernel_wait4.__do_sys_wait4.do_syscall_64
      0.01 ± 11%    +100.0%       0.01 ± 14%  perf-sched.sch_delay.max.ms.irq_thread.kthread.ret_from_fork.ret_from_fork_asm
      0.00 ±145%   +1480.0%       0.01 ± 65%  perf-sched.sch_delay.max.ms.irqentry_exit_to_user_mode.asm_exc_page_fault.[unknown].[unknown]
      0.01 ± 18%   +5433.3%       0.58 ±133%  perf-sched.sch_delay.max.ms.schedule_hrtimeout_range_clock.do_poll.constprop.0.do_sys_poll
      0.01 ± 10%    +516.3%       0.05 ± 88%  perf-sched.sch_delay.max.ms.schedule_timeout.__wait_for_common.wait_for_completion_state.kernel_clone
      0.01 ± 16%    +708.9%       0.06 ±144%  perf-sched.sch_delay.max.ms.schedule_timeout.kcompactd.kthread.ret_from_fork
      0.01 ± 21%  +43222.4%       4.19 ±127%  perf-sched.sch_delay.max.ms.schedule_timeout.rcu_gp_fqs_loop.rcu_gp_kthread.kthread
      0.04 ±102%  +79610.1%      30.29 ±221%  perf-sched.sch_delay.max.ms.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe.[unknown]
      0.01 ±  8%    +378.5%       0.05 ± 13%  perf-sched.total_sch_delay.average.ms
      4.82 ± 31%   +3815.7%     188.77 ±  2%  perf-sched.total_sch_delay.max.ms
      2004 ± 24%  +11113.1%     224730        perf-sched.total_wait_and_delay.count.ms
      1367 ± 41%    +246.4%       4735 ±  3%  perf-sched.total_wait_and_delay.max.ms
      1367 ± 41%    +246.4%       4735 ±  3%  perf-sched.total_wait_time.max.ms
      3.25 ± 53%    -100.0%       0.00        perf-sched.wait_and_delay.avg.ms.__cond_resched.__wait_for_common.affine_move_task.__set_cpus_allowed_ptr.__sched_setaffinity
    166.61 ±141%    +349.9%     749.60 ± 10%  perf-sched.wait_and_delay.avg.ms.__x64_sys_pause.do_syscall_64.entry_SYSCALL_64_after_hwframe.[unknown]
    111.12 ±141%    +570.0%     744.49 ± 10%  perf-sched.wait_and_delay.avg.ms.do_nanosleep.hrtimer_nanosleep.common_nsleep.__x64_sys_clock_nanosleep
      0.29 ±  4%    +457.1%       1.64 ± 14%  perf-sched.wait_and_delay.avg.ms.do_wait.kernel_wait4.__do_sys_wait4.do_syscall_64
     77.78 ±106%    +198.6%     232.23 ±  3%  perf-sched.wait_and_delay.avg.ms.irq_thread.kthread.ret_from_fork.ret_from_fork_asm
    130.49 ± 36%    +216.7%     413.24 ±  7%  perf-sched.wait_and_delay.avg.ms.schedule_hrtimeout_range_clock.ep_poll.do_epoll_wait.__x64_sys_epoll_wait
      0.47 ±  5%    -100.0%       0.00        perf-sched.wait_and_delay.avg.ms.schedule_timeout.__wait_for_common.wait_for_completion_state.kernel_clone
    252.00 ± 50%     +80.0%     453.54        perf-sched.wait_and_delay.avg.ms.schedule_timeout.kcompactd.kthread.ret_from_fork
     19.90 ± 18%     -79.4%       4.10        perf-sched.wait_and_delay.avg.ms.schedule_timeout.rcu_gp_fqs_loop.rcu_gp_kthread.kthread
     48.98          -100.0%       0.00        perf-sched.wait_and_delay.avg.ms.schedule_timeout.xfsaild.kthread.ret_from_fork
    239.32 ± 81%     -97.8%       5.28 ±  9%  perf-sched.wait_and_delay.avg.ms.worker_thread.kthread.ret_from_fork.ret_from_fork_asm
    586.67 ± 25%    -100.0%       0.00        perf-sched.wait_and_delay.count.__cond_resched.__wait_for_common.affine_move_task.__set_cpus_allowed_ptr.__sched_setaffinity
      1.33 ±141%    +650.0%      10.00        perf-sched.wait_and_delay.count.__x64_sys_pause.do_syscall_64.entry_SYSCALL_64_after_hwframe.[unknown]
      1.00 ±141%    +416.7%       5.17 ±  7%  perf-sched.wait_and_delay.count.do_nanosleep.hrtimer_nanosleep.common_nsleep.__x64_sys_clock_nanosleep
     31.17 ± 11%    -100.0%       0.00        perf-sched.wait_and_delay.count.do_task_dead.do_exit.__x64_sys_exit.x64_sys_call.do_syscall_64
     66.17 ± 14%   +4026.4%       2730 ± 19%  perf-sched.wait_and_delay.count.do_task_dead.do_exit.do_group_exit.__x64_sys_exit_group.x64_sys_call
     51.67 ± 21%   +4399.7%       2324 ± 18%  perf-sched.wait_and_delay.count.do_wait.kernel_wait4.__do_sys_wait4.do_syscall_64
      3.00 ±107%    +522.2%      18.67 ±  5%  perf-sched.wait_and_delay.count.irq_thread.kthread.ret_from_fork.ret_from_fork_asm
    169.50 ± 22%   +1190.6%       2187 ±  4%  perf-sched.wait_and_delay.count.pipe_read.vfs_read.ksys_read.do_syscall_64
      3.83 ±102%   +3339.1%     131.83 ±  3%  perf-sched.wait_and_delay.count.schedule_hrtimeout_range_clock.do_poll.constprop.0.do_sys_poll
     31.50 ± 25%    -100.0%       0.00        perf-sched.wait_and_delay.count.schedule_timeout.__wait_for_common.wait_for_completion_state.kernel_clone
      4.67 ± 58%    +328.6%      20.00        perf-sched.wait_and_delay.count.schedule_timeout.kcompactd.kthread.ret_from_fork
     64.17 ± 37%   +1794.8%       1215        perf-sched.wait_and_delay.count.schedule_timeout.rcu_gp_fqs_loop.rcu_gp_kthread.kthread
     27.50 ± 38%    -100.0%       0.00        perf-sched.wait_and_delay.count.schedule_timeout.xfsaild.kthread.ret_from_fork
    647.50 ± 30%    +380.3%       3109 ±  5%  perf-sched.wait_and_delay.count.smpboot_thread_fn.kthread.ret_from_fork.ret_from_fork_asm
      1.00 ±223%  +1.8e+05%       1760        perf-sched.wait_and_delay.count.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe.[unknown]
    217.17 ± 32%  +26170.7%      57051        perf-sched.wait_and_delay.count.worker_thread.kthread.ret_from_fork.ret_from_fork_asm
      1249 ± 51%    -100.0%       0.00        perf-sched.wait_and_delay.max.ms.__cond_resched.__wait_for_common.affine_move_task.__set_cpus_allowed_ptr.__sched_setaffinity
      1.30 ± 17%  +28306.6%     368.62 ± 31%  perf-sched.wait_and_delay.max.ms.do_wait.kernel_wait4.__do_sys_wait4.do_syscall_64
    283.33 ±104%    +246.8%     982.65 ±  3%  perf-sched.wait_and_delay.max.ms.irq_thread.kthread.ret_from_fork.ret_from_fork_asm
    336.68 ±140%    +957.4%       3559 ± 50%  perf-sched.wait_and_delay.max.ms.pipe_read.vfs_read.ksys_read.do_syscall_64
    333.67 ±141%   +1049.9%       3837 ± 38%  perf-sched.wait_and_delay.max.ms.schedule_hrtimeout_range_clock.do_poll.constprop.0.do_sys_poll
      1.91          -100.0%       0.00        perf-sched.wait_and_delay.max.ms.schedule_timeout.__wait_for_common.wait_for_completion_state.kernel_clone
    329.52 ± 29%     -82.1%      59.00 ±104%  perf-sched.wait_and_delay.max.ms.schedule_timeout.rcu_gp_fqs_loop.rcu_gp_kthread.kthread
     52.00          -100.0%       0.00        perf-sched.wait_and_delay.max.ms.schedule_timeout.xfsaild.kthread.ret_from_fork
    975.69 ± 20%    +150.2%       2440 ±  3%  perf-sched.wait_and_delay.max.ms.smpboot_thread_fn.kthread.ret_from_fork.ret_from_fork_asm
    154.83 ±223%    +458.4%     864.51 ± 35%  perf-sched.wait_and_delay.max.ms.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe.[unknown]
      1312 ± 47%    +222.7%       4234 ±  8%  perf-sched.wait_and_delay.max.ms.worker_thread.kthread.ret_from_fork.ret_from_fork_asm
      0.44 ±223%  +25614.4%     112.42 ± 18%  perf-sched.wait_time.avg.ms.__cond_resched.dput.__fput.__x64_sys_close.do_syscall_64
      0.00 ±223%  +1.7e+06%      54.23 ±119%  perf-sched.wait_time.avg.ms.__cond_resched.kmem_cache_alloc_noprof.alloc_empty_file.path_openat.do_filp_open
    166.61 ±141%    +349.9%     749.60 ± 10%  perf-sched.wait_time.avg.ms.__x64_sys_pause.do_syscall_64.entry_SYSCALL_64_after_hwframe.[unknown]
    111.12 ±141%    +570.0%     744.48 ± 10%  perf-sched.wait_time.avg.ms.do_nanosleep.hrtimer_nanosleep.common_nsleep.__x64_sys_clock_nanosleep
      0.29 ±  4%    +467.6%       1.63 ± 14%  perf-sched.wait_time.avg.ms.do_wait.kernel_wait4.__do_sys_wait4.do_syscall_64
     94.46 ± 72%    +145.9%     232.23 ±  3%  perf-sched.wait_time.avg.ms.irq_thread.kthread.ret_from_fork.ret_from_fork_asm
      0.01 ±144%  +2.2e+06%     127.86 ±  6%  perf-sched.wait_time.avg.ms.irqentry_exit_to_user_mode.asm_sysvec_apic_timer_interrupt.[unknown].[unknown]
      0.00 ±223%   +4333.3%       0.02 ± 27%  perf-sched.wait_time.avg.ms.irqentry_exit_to_user_mode.asm_sysvec_reschedule_ipi.[unknown].[unknown]
    130.47 ± 36%    +216.7%     413.23 ±  7%  perf-sched.wait_time.avg.ms.schedule_hrtimeout_range_clock.ep_poll.do_epoll_wait.__x64_sys_epoll_wait
      0.46 ±  5%     +53.4%       0.71        perf-sched.wait_time.avg.ms.schedule_timeout.__wait_for_common.wait_for_completion_state.kernel_clone
    252.00 ± 50%     +80.0%     453.53        perf-sched.wait_time.avg.ms.schedule_timeout.kcompactd.kthread.ret_from_fork
     19.89 ± 18%     -79.5%       4.09        perf-sched.wait_time.avg.ms.schedule_timeout.rcu_gp_fqs_loop.rcu_gp_kthread.kthread
    239.29 ± 81%     -97.8%       5.25 ±  9%  perf-sched.wait_time.avg.ms.worker_thread.kthread.ret_from_fork.ret_from_fork_asm
      0.44 ±223%  +39762.2%     174.26 ±  8%  perf-sched.wait_time.max.ms.__cond_resched.dput.__fput.__x64_sys_close.do_syscall_64
      0.00 ±223%  +2.8e+06%      88.88 ± 99%  perf-sched.wait_time.max.ms.__cond_resched.kmem_cache_alloc_noprof.alloc_empty_file.path_openat.do_filp_open
      1.29 ± 17%  +28522.9%     368.61 ± 31%  perf-sched.wait_time.max.ms.do_wait.kernel_wait4.__do_sys_wait4.do_syscall_64
    349.88 ± 67%    +180.9%     982.64 ±  3%  perf-sched.wait_time.max.ms.irq_thread.kthread.ret_from_fork.ret_from_fork_asm
      0.02 ±191%  +1.1e+06%     187.74        perf-sched.wait_time.max.ms.irqentry_exit_to_user_mode.asm_sysvec_apic_timer_interrupt.[unknown].[unknown]
      0.00 ±223%   +6166.7%       0.03 ± 47%  perf-sched.wait_time.max.ms.irqentry_exit_to_user_mode.asm_sysvec_reschedule_ipi.[unknown].[unknown]
    336.67 ±140%    +957.4%       3559 ± 50%  perf-sched.wait_time.max.ms.pipe_read.vfs_read.ksys_read.do_syscall_64
      4.66 ± 10%     -42.8%       2.66 ± 70%  perf-sched.wait_time.max.ms.rcu_gp_kthread.kthread.ret_from_fork.ret_from_fork_asm
    333.67 ±141%   +1049.9%       3836 ± 38%  perf-sched.wait_time.max.ms.schedule_hrtimeout_range_clock.do_poll.constprop.0.do_sys_poll
      1.91           +40.7%       2.69 ±  2%  perf-sched.wait_time.max.ms.schedule_timeout.__wait_for_common.wait_for_completion_state.kernel_clone
    329.51 ± 29%     -82.6%      57.33 ±109%  perf-sched.wait_time.max.ms.schedule_timeout.rcu_gp_fqs_loop.rcu_gp_kthread.kthread
    975.68 ± 20%    +150.2%       2440 ±  3%  perf-sched.wait_time.max.ms.smpboot_thread_fn.kthread.ret_from_fork.ret_from_fork_asm
    154.84 ±223%    +458.3%     864.50 ± 35%  perf-sched.wait_time.max.ms.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe.[unknown]
      1312 ± 47%    +222.7%       4233 ±  8%  perf-sched.wait_time.max.ms.worker_thread.kthread.ret_from_fork.ret_from_fork_asm
      7.94            -1.8        6.10 ±  3%  perf-profile.calltrace.cycles-pp.down_write.open_last_lookups.path_openat.do_filp_open.do_sys_openat2
      7.94            -1.8        6.10 ±  3%  perf-profile.calltrace.cycles-pp.rwsem_down_write_slowpath.down_write.open_last_lookups.path_openat.do_filp_open
      9.08            -1.8        7.25 ±  3%  perf-profile.calltrace.cycles-pp.open_last_lookups.path_openat.do_filp_open.do_sys_openat2.__x64_sys_creat
      9.20            -1.8        7.36 ±  3%  perf-profile.calltrace.cycles-pp.do_sys_openat2.__x64_sys_creat.do_syscall_64.entry_SYSCALL_64_after_hwframe.creat64
      9.16            -1.8        7.33 ±  3%  perf-profile.calltrace.cycles-pp.path_openat.do_filp_open.do_sys_openat2.__x64_sys_creat.do_syscall_64
      9.16            -1.8        7.33 ±  3%  perf-profile.calltrace.cycles-pp.do_filp_open.do_sys_openat2.__x64_sys_creat.do_syscall_64.entry_SYSCALL_64_after_hwframe
      9.20            -1.8        7.37 ±  3%  perf-profile.calltrace.cycles-pp.__x64_sys_creat.do_syscall_64.entry_SYSCALL_64_after_hwframe.creat64
      9.26            -1.8        7.44 ±  3%  perf-profile.calltrace.cycles-pp.do_syscall_64.entry_SYSCALL_64_after_hwframe.creat64
      9.26            -1.8        7.44 ±  3%  perf-profile.calltrace.cycles-pp.entry_SYSCALL_64_after_hwframe.creat64
      9.28            -1.8        7.46 ±  3%  perf-profile.calltrace.cycles-pp.creat64
      7.29            -1.8        5.51 ±  4%  perf-profile.calltrace.cycles-pp.rwsem_optimistic_spin.rwsem_down_write_slowpath.down_write.open_last_lookups.path_openat
      8.80            -1.6        7.17 ±  3%  perf-profile.calltrace.cycles-pp.__x64_sys_unlink.do_syscall_64.entry_SYSCALL_64_after_hwframe.unlink
      8.80            -1.6        7.17 ±  3%  perf-profile.calltrace.cycles-pp.do_unlinkat.__x64_sys_unlink.do_syscall_64.entry_SYSCALL_64_after_hwframe.unlink
      8.86            -1.6        7.24 ±  3%  perf-profile.calltrace.cycles-pp.do_syscall_64.entry_SYSCALL_64_after_hwframe.unlink
      8.86            -1.6        7.24 ±  3%  perf-profile.calltrace.cycles-pp.entry_SYSCALL_64_after_hwframe.unlink
      8.89            -1.6        7.27 ±  3%  perf-profile.calltrace.cycles-pp.unlink
      7.94            -1.6        6.33 ±  3%  perf-profile.calltrace.cycles-pp.down_write.do_unlinkat.__x64_sys_unlink.do_syscall_64.entry_SYSCALL_64_after_hwframe
      7.94            -1.6        6.32 ±  3%  perf-profile.calltrace.cycles-pp.rwsem_down_write_slowpath.down_write.do_unlinkat.__x64_sys_unlink.do_syscall_64
      6.42            -1.6        4.85 ±  4%  perf-profile.calltrace.cycles-pp.osq_lock.rwsem_optimistic_spin.rwsem_down_write_slowpath.down_write.open_last_lookups
      7.28            -1.5        5.74 ±  4%  perf-profile.calltrace.cycles-pp.rwsem_optimistic_spin.rwsem_down_write_slowpath.down_write.do_unlinkat.__x64_sys_unlink
      6.46            -1.4        5.09 ±  4%  perf-profile.calltrace.cycles-pp.osq_lock.rwsem_optimistic_spin.rwsem_down_write_slowpath.down_write.do_unlinkat
      0.85            -0.2        0.65        perf-profile.calltrace.cycles-pp.rwsem_spin_on_owner.rwsem_optimistic_spin.rwsem_down_write_slowpath.down_write.open_last_lookups
      0.79            -0.2        0.63        perf-profile.calltrace.cycles-pp.rwsem_spin_on_owner.rwsem_optimistic_spin.rwsem_down_write_slowpath.down_write.do_unlinkat
      1.75            -0.2        1.60 ±  4%  perf-profile.calltrace.cycles-pp.worker_thread.kthread.ret_from_fork.ret_from_fork_asm
      1.78            -0.2        1.63 ±  4%  perf-profile.calltrace.cycles-pp.kthread.ret_from_fork.ret_from_fork_asm
      1.78            -0.1        1.64 ±  4%  perf-profile.calltrace.cycles-pp.ret_from_fork.ret_from_fork_asm
      1.78            -0.1        1.64 ±  4%  perf-profile.calltrace.cycles-pp.ret_from_fork_asm
      1.31            -0.1        1.23        perf-profile.calltrace.cycles-pp.xfs_inactive.xfs_inodegc_worker.process_one_work.worker_thread.kthread
      1.33            -0.1        1.25        perf-profile.calltrace.cycles-pp.xfs_inodegc_worker.process_one_work.worker_thread.kthread.ret_from_fork
      0.96            -0.1        0.88        perf-profile.calltrace.cycles-pp.xfs_inactive_ifree.xfs_inactive.xfs_inodegc_worker.process_one_work.worker_thread
      0.69            -0.1        0.63        perf-profile.calltrace.cycles-pp.xfs_ifree.xfs_inactive_ifree.xfs_inactive.xfs_inodegc_worker.process_one_work
      0.65            -0.1        0.60        perf-profile.calltrace.cycles-pp.xfs_difree.xfs_inode_uninit.xfs_ifree.xfs_inactive_ifree.xfs_inactive
      0.68            -0.1        0.62        perf-profile.calltrace.cycles-pp.xfs_inode_uninit.xfs_ifree.xfs_inactive_ifree.xfs_inactive.xfs_inodegc_worker
      0.70            +0.0        0.72        perf-profile.calltrace.cycles-pp.xfs_ilock.xfs_file_buffered_read.xfs_file_read_iter.vfs_read.ksys_read
      0.51            +0.0        0.53        perf-profile.calltrace.cycles-pp.down_read.xfs_ilock.xfs_file_buffered_read.xfs_file_read_iter.vfs_read
      1.03            +0.0        1.06        perf-profile.calltrace.cycles-pp.__alloc_pages_noprof.alloc_pages_mpol_noprof.folio_alloc_noprof.__filemap_get_folio.iomap_write_begin
      0.51            +0.0        0.54 ±  2%  perf-profile.calltrace.cycles-pp.down_write.xfs_ilock_for_iomap.xfs_buffered_write_iomap_begin.iomap_iter.iomap_file_buffered_write
      0.54            +0.0        0.57        perf-profile.calltrace.cycles-pp.xas_load.filemap_get_read_batch.filemap_get_pages.filemap_read.xfs_file_buffered_read
      0.61            +0.0        0.64        perf-profile.calltrace.cycles-pp.xfs_iunlock.xfs_file_buffered_write.vfs_write.ksys_write.do_syscall_64
      1.19            +0.0        1.22        perf-profile.calltrace.cycles-pp.alloc_pages_mpol_noprof.folio_alloc_noprof.__filemap_get_folio.iomap_write_begin.iomap_write_iter
      1.26            +0.0        1.29        perf-profile.calltrace.cycles-pp.folio_alloc_noprof.__filemap_get_folio.iomap_write_begin.iomap_write_iter.iomap_file_buffered_write
      0.78            +0.0        0.82        perf-profile.calltrace.cycles-pp.xfs_ilock_for_iomap.xfs_buffered_write_iomap_begin.iomap_iter.iomap_file_buffered_write.xfs_file_buffered_write
      1.00            +0.0        1.04        perf-profile.calltrace.cycles-pp.filemap_get_entry.__filemap_get_folio.iomap_write_begin.iomap_write_iter.iomap_file_buffered_write
      1.04            +0.0        1.07        perf-profile.calltrace.cycles-pp.fault_in_readable.fault_in_iov_iter_readable.iomap_write_iter.iomap_file_buffered_write.xfs_file_buffered_write
      1.19            +0.0        1.23        perf-profile.calltrace.cycles-pp.fault_in_iov_iter_readable.iomap_write_iter.iomap_file_buffered_write.xfs_file_buffered_write.vfs_write
      1.53            +0.0        1.57        perf-profile.calltrace.cycles-pp.clear_bhb_loop.read
      1.11            +0.0        1.15        perf-profile.calltrace.cycles-pp.iomap_iter_advance.iomap_iter.iomap_file_buffered_write.xfs_file_buffered_write.vfs_write
      1.02            +0.0        1.06        perf-profile.calltrace.cycles-pp.ksys_lseek.do_syscall_64.entry_SYSCALL_64_after_hwframe.llseek
      1.60            +0.0        1.64        perf-profile.calltrace.cycles-pp.clear_bhb_loop.write
      1.43            +0.0        1.48        perf-profile.calltrace.cycles-pp.entry_SYSCALL_64.llseek
      1.64            +0.1        1.70        perf-profile.calltrace.cycles-pp.copy_page_from_iter_atomic.iomap_write_iter.iomap_file_buffered_write.xfs_file_buffered_write.vfs_write
      1.54            +0.1        1.59        perf-profile.calltrace.cycles-pp.clear_bhb_loop.llseek
      0.86            +0.1        0.91        perf-profile.calltrace.cycles-pp.iomap_set_range_uptodate.__iomap_write_begin.iomap_write_begin.iomap_write_iter.iomap_file_buffered_write
      1.89            +0.1        1.95        perf-profile.calltrace.cycles-pp.do_syscall_64.entry_SYSCALL_64_after_hwframe.llseek
      1.99            +0.1        2.04        perf-profile.calltrace.cycles-pp.filemap_get_read_batch.filemap_get_pages.filemap_read.xfs_file_buffered_read.xfs_file_read_iter
      2.34            +0.1        2.40        perf-profile.calltrace.cycles-pp.filemap_get_pages.filemap_read.xfs_file_buffered_read.xfs_file_read_iter.vfs_read
      2.14            +0.1        2.21        perf-profile.calltrace.cycles-pp.entry_SYSCALL_64_after_hwframe.llseek
      1.57            +0.1        1.64        perf-profile.calltrace.cycles-pp.entry_SYSCALL_64.write
      1.62            +0.1        1.70 ±  2%  perf-profile.calltrace.cycles-pp.entry_SYSCALL_64.read
      2.37            +0.1        2.51        perf-profile.calltrace.cycles-pp.iomap_set_range_uptodate.iomap_write_end.iomap_write_iter.iomap_file_buffered_write.xfs_file_buffered_write
      4.01            +0.2        4.16        perf-profile.calltrace.cycles-pp.xfs_buffered_write_iomap_begin.iomap_iter.iomap_file_buffered_write.xfs_file_buffered_write.vfs_write
      6.70            +0.2        6.85        perf-profile.calltrace.cycles-pp.__filemap_get_folio.iomap_write_begin.iomap_write_iter.iomap_file_buffered_write.xfs_file_buffered_write
      2.68            +0.2        2.85        perf-profile.calltrace.cycles-pp.memset_orig.zero_user_segments.__iomap_write_begin.iomap_write_begin.iomap_write_iter
      2.78            +0.2        2.95        perf-profile.calltrace.cycles-pp.zero_user_segments.__iomap_write_begin.iomap_write_begin.iomap_write_iter.iomap_file_buffered_write
      6.22            +0.2        6.41        perf-profile.calltrace.cycles-pp.llseek
      4.69            +0.2        4.89        perf-profile.calltrace.cycles-pp.iomap_write_end.iomap_write_iter.iomap_file_buffered_write.xfs_file_buffered_write.vfs_write
      5.90            +0.2        6.11        perf-profile.calltrace.cycles-pp.iomap_iter.iomap_file_buffered_write.xfs_file_buffered_write.vfs_write.ksys_write
      4.23            +0.2        4.47        perf-profile.calltrace.cycles-pp.__iomap_write_begin.iomap_write_begin.iomap_write_iter.iomap_file_buffered_write.xfs_file_buffered_write
      4.88            +0.3        5.15        perf-profile.calltrace.cycles-pp._copy_to_iter.copy_page_to_iter.filemap_read.xfs_file_buffered_read.xfs_file_read_iter
      5.10            +0.3        5.38        perf-profile.calltrace.cycles-pp.copy_page_to_iter.filemap_read.xfs_file_buffered_read.xfs_file_read_iter.vfs_read
     14.66            +0.4       15.06        perf-profile.calltrace.cycles-pp.filemap_read.xfs_file_buffered_read.xfs_file_read_iter.vfs_read.ksys_read
      1.01 ±  2%      +0.4        1.42 ±  2%  perf-profile.calltrace.cycles-pp.sysvec_apic_timer_interrupt.asm_sysvec_apic_timer_interrupt.acpi_safe_halt.acpi_idle_enter.cpuidle_enter_state
      0.08 ±223%      +0.4        0.51        perf-profile.calltrace.cycles-pp.xfs_break_layouts.xfs_file_write_checks.xfs_file_buffered_write.vfs_write.ksys_write
     11.72            +0.4       12.16        perf-profile.calltrace.cycles-pp.iomap_write_begin.iomap_write_iter.iomap_file_buffered_write.xfs_file_buffered_write.vfs_write
     16.26            +0.4       16.70        perf-profile.calltrace.cycles-pp.xfs_file_buffered_read.xfs_file_read_iter.vfs_read.ksys_read.do_syscall_64
     16.66            +0.4       17.10        perf-profile.calltrace.cycles-pp.xfs_file_read_iter.vfs_read.ksys_read.do_syscall_64.entry_SYSCALL_64_after_hwframe
     18.40            +0.5       18.89        perf-profile.calltrace.cycles-pp.vfs_read.ksys_read.do_syscall_64.entry_SYSCALL_64_after_hwframe.read
     19.10            +0.5       19.62        perf-profile.calltrace.cycles-pp.ksys_read.do_syscall_64.entry_SYSCALL_64_after_hwframe.read
      1.37            +0.5        1.90 ±  2%  perf-profile.calltrace.cycles-pp.acpi_safe_halt.acpi_idle_enter.cpuidle_enter_state.cpuidle_enter.cpuidle_idle_call
      0.17 ±141%      +0.5        0.70 ±  5%  perf-profile.calltrace.cycles-pp.__sysvec_apic_timer_interrupt.sysvec_apic_timer_interrupt.asm_sysvec_apic_timer_interrupt.acpi_safe_halt.acpi_idle_enter
      0.00            +0.5        0.53        perf-profile.calltrace.cycles-pp.handle_softirqs.__irq_exit_rcu.sysvec_apic_timer_interrupt.asm_sysvec_apic_timer_interrupt.acpi_safe_halt
      0.00            +0.6        0.55        perf-profile.calltrace.cycles-pp.__irq_exit_rcu.sysvec_apic_timer_interrupt.asm_sysvec_apic_timer_interrupt.acpi_safe_halt.acpi_idle_enter
     20.13            +0.6       20.68        perf-profile.calltrace.cycles-pp.do_syscall_64.entry_SYSCALL_64_after_hwframe.read
     20.38            +0.6       20.94        perf-profile.calltrace.cycles-pp.entry_SYSCALL_64_after_hwframe.read
     24.42            +0.7       25.08        perf-profile.calltrace.cycles-pp.read
      0.00            +0.7        0.68 ±  5%  perf-profile.calltrace.cycles-pp.hrtimer_interrupt.__sysvec_apic_timer_interrupt.sysvec_apic_timer_interrupt.asm_sysvec_apic_timer_interrupt.acpi_safe_halt
     22.42            +0.8       23.27        perf-profile.calltrace.cycles-pp.iomap_write_iter.iomap_file_buffered_write.xfs_file_buffered_write.vfs_write.ksys_write
      2.11            +0.9        2.99        perf-profile.calltrace.cycles-pp.acpi_idle_enter.cpuidle_enter_state.cpuidle_enter.cpuidle_idle_call.do_idle
      2.16            +0.9        3.08        perf-profile.calltrace.cycles-pp.cpuidle_enter_state.cpuidle_enter.cpuidle_idle_call.do_idle.cpu_startup_entry
      2.17            +0.9        3.09        perf-profile.calltrace.cycles-pp.cpuidle_enter.cpuidle_idle_call.do_idle.cpu_startup_entry.start_secondary
      2.32            +1.0        3.29        perf-profile.calltrace.cycles-pp.cpuidle_idle_call.do_idle.cpu_startup_entry.start_secondary.common_startup_64
      2.52            +1.0        3.51        perf-profile.calltrace.cycles-pp.cpu_startup_entry.start_secondary.common_startup_64
      2.52            +1.0        3.51        perf-profile.calltrace.cycles-pp.start_secondary.common_startup_64
      2.51            +1.0        3.50        perf-profile.calltrace.cycles-pp.do_idle.cpu_startup_entry.start_secondary.common_startup_64
      2.54            +1.0        3.54        perf-profile.calltrace.cycles-pp.common_startup_64
      2.06 ±  2%      +1.1        3.13        perf-profile.calltrace.cycles-pp.asm_sysvec_apic_timer_interrupt.acpi_safe_halt.acpi_idle_enter.cpuidle_enter_state.cpuidle_enter
     29.32            +1.1       30.42        perf-profile.calltrace.cycles-pp.iomap_file_buffered_write.xfs_file_buffered_write.vfs_write.ksys_write.do_syscall_64
     33.36            +1.2       34.57        perf-profile.calltrace.cycles-pp.xfs_file_buffered_write.vfs_write.ksys_write.do_syscall_64.entry_SYSCALL_64_after_hwframe
     36.26            +1.3       37.56        perf-profile.calltrace.cycles-pp.vfs_write.ksys_write.do_syscall_64.entry_SYSCALL_64_after_hwframe.write
     37.09            +1.4       38.44        perf-profile.calltrace.cycles-pp.ksys_write.do_syscall_64.entry_SYSCALL_64_after_hwframe.write
     38.13            +1.4       39.53        perf-profile.calltrace.cycles-pp.do_syscall_64.entry_SYSCALL_64_after_hwframe.write
     38.38            +1.4       39.79        perf-profile.calltrace.cycles-pp.entry_SYSCALL_64_after_hwframe.write
     41.30            +2.0       43.33        perf-profile.calltrace.cycles-pp.write
     15.88            -3.5       12.42 ±  3%  perf-profile.children.cycles-pp.rwsem_down_write_slowpath
     17.02            -3.4       13.59 ±  3%  perf-profile.children.cycles-pp.down_write
     14.58            -3.3       11.25 ±  4%  perf-profile.children.cycles-pp.rwsem_optimistic_spin
     12.90            -3.0        9.94 ±  4%  perf-profile.children.cycles-pp.osq_lock
      9.09            -1.8        7.26 ±  3%  perf-profile.children.cycles-pp.open_last_lookups
      9.24            -1.8        7.41 ±  3%  perf-profile.children.cycles-pp.path_openat
      9.20            -1.8        7.37 ±  3%  perf-profile.children.cycles-pp.__x64_sys_creat
      9.30            -1.8        7.47 ±  3%  perf-profile.children.cycles-pp.do_sys_openat2
      9.24            -1.8        7.41 ±  3%  perf-profile.children.cycles-pp.do_filp_open
      9.29            -1.8        7.48 ±  3%  perf-profile.children.cycles-pp.creat64
      8.80            -1.6        7.17 ±  3%  perf-profile.children.cycles-pp.__x64_sys_unlink
      8.80            -1.6        7.17 ±  3%  perf-profile.children.cycles-pp.do_unlinkat
      8.90            -1.6        7.28 ±  3%  perf-profile.children.cycles-pp.unlink
     83.14            -1.3       81.79        perf-profile.children.cycles-pp.do_syscall_64
     83.86            -1.3       82.52        perf-profile.children.cycles-pp.entry_SYSCALL_64_after_hwframe
      2.29            -0.4        1.86        perf-profile.children.cycles-pp.rwsem_spin_on_owner
      1.06            -0.3        0.81        perf-profile.children.cycles-pp.pick_next_task_fair
      1.01            -0.2        0.77 ±  2%  perf-profile.children.cycles-pp.sched_balance_newidle
      1.28            -0.2        1.04        perf-profile.children.cycles-pp.schedule
      1.42            -0.2        1.19        perf-profile.children.cycles-pp.__schedule
      1.01            -0.2        0.80 ±  2%  perf-profile.children.cycles-pp.sched_balance_rq
      0.86            -0.2        0.68        perf-profile.children.cycles-pp.sched_balance_find_src_group
      0.85            -0.2        0.67        perf-profile.children.cycles-pp.update_sd_lb_stats
      0.79            -0.2        0.62        perf-profile.children.cycles-pp.update_sg_lb_stats
      1.78            -0.2        1.63 ±  4%  perf-profile.children.cycles-pp.kthread
      1.75            -0.2        1.60 ±  4%  perf-profile.children.cycles-pp.worker_thread
      1.78            -0.1        1.64 ±  4%  perf-profile.children.cycles-pp.ret_from_fork
      1.78            -0.1        1.64 ±  4%  perf-profile.children.cycles-pp.ret_from_fork_asm
      1.31            -0.1        1.23        perf-profile.children.cycles-pp.xfs_inactive
      1.33            -0.1        1.25        perf-profile.children.cycles-pp.xfs_inodegc_worker
      0.48            -0.1        0.41        perf-profile.children.cycles-pp.___down_common
      0.48            -0.1        0.41        perf-profile.children.cycles-pp.__down
      0.54            -0.1        0.47        perf-profile.children.cycles-pp.down
      0.48            -0.1        0.41        perf-profile.children.cycles-pp.schedule_timeout
      0.55            -0.1        0.48        perf-profile.children.cycles-pp.xfs_buf_lock
      0.96            -0.1        0.88        perf-profile.children.cycles-pp.xfs_inactive_ifree
      0.75            -0.1        0.68        perf-profile.children.cycles-pp.xfs_buf_get_map
      0.63            -0.1        0.56        perf-profile.children.cycles-pp.xfs_read_agi
      0.77            -0.1        0.70        perf-profile.children.cycles-pp.xfs_buf_read_map
      0.56            -0.1        0.50        perf-profile.children.cycles-pp.xfs_buf_find_lock
      0.67            -0.1        0.60        perf-profile.children.cycles-pp.xfs_buf_lookup
      0.91            -0.1        0.84        perf-profile.children.cycles-pp.xfs_trans_read_buf_map
      0.58            -0.1        0.52        perf-profile.children.cycles-pp.xfs_ialloc_read_agi
      0.69            -0.1        0.63        perf-profile.children.cycles-pp.xfs_ifree
      0.59            -0.1        0.53 ±  2%  perf-profile.children.cycles-pp.schedule_preempt_disabled
      0.65            -0.1        0.60        perf-profile.children.cycles-pp.xfs_difree
      0.68            -0.1        0.62        perf-profile.children.cycles-pp.xfs_inode_uninit
      0.27 ±  4%      -0.0        0.23 ±  3%  perf-profile.children.cycles-pp.task_tick_fair
      0.17 ±  2%      -0.0        0.15 ±  2%  perf-profile.children.cycles-pp.update_load_avg
      0.14 ±  3%      -0.0        0.11 ±  4%  perf-profile.children.cycles-pp.idle_cpu
      0.29            -0.0        0.27        perf-profile.children.cycles-pp.xfs_buf_item_format_segment
      0.08 ±  4%      -0.0        0.06 ±  7%  perf-profile.children.cycles-pp.cpu_util
      0.30 ±  2%      -0.0        0.28 ±  2%  perf-profile.children.cycles-pp.xfs_buf_item_format
      0.12 ±  5%      -0.0        0.10 ±  3%  perf-profile.children.cycles-pp.update_cfs_group
      0.09 ±  4%      -0.0        0.08 ±  6%  perf-profile.children.cycles-pp._find_next_and_bit
      0.18 ±  2%      -0.0        0.17 ±  2%  perf-profile.children.cycles-pp.xlog_copy_iovec
      0.06            +0.0        0.07        perf-profile.children.cycles-pp.kmem_cache_alloc_lru_noprof
      0.23            +0.0        0.24        perf-profile.children.cycles-pp.xfs_buffered_write_iomap_end
      0.29            +0.0        0.30        perf-profile.children.cycles-pp._raw_spin_lock
      0.05            +0.0        0.06 ±  7%  perf-profile.children.cycles-pp.native_sched_clock
      0.13 ±  3%      +0.0        0.15 ±  3%  perf-profile.children.cycles-pp.sched_ttwu_pending
      0.16 ±  2%      +0.0        0.18 ±  2%  perf-profile.children.cycles-pp.__flush_smp_call_function_queue
      0.08 ±  6%      +0.0        0.09 ±  4%  perf-profile.children.cycles-pp.__update_blocked_fair
      0.09            +0.0        0.11 ±  3%  perf-profile.children.cycles-pp.sched_balance_softirq
      0.07            +0.0        0.09 ±  4%  perf-profile.children.cycles-pp.update_rq_clock
      0.44            +0.0        0.46        perf-profile.children.cycles-pp.generic_write_checks
      0.23            +0.0        0.25 ±  2%  perf-profile.children.cycles-pp.try_to_wake_up
      0.48            +0.0        0.50        perf-profile.children.cycles-pp.xfs_iext_lookup_extent
      0.55            +0.0        0.57        perf-profile.children.cycles-pp.xfs_break_layouts
      0.10 ±  4%      +0.0        0.13 ±  2%  perf-profile.children.cycles-pp.kick_pool
      0.12 ±  3%      +0.0        0.14 ±  2%  perf-profile.children.cycles-pp.__queue_work
      0.14 ±  2%      +0.0        0.17 ±  2%  perf-profile.children.cycles-pp.run_timer_softirq
      0.14            +0.0        0.16 ±  3%  perf-profile.children.cycles-pp.__run_timers
      0.12 ±  4%      +0.0        0.15        perf-profile.children.cycles-pp.sched_balance_update_blocked_averages
      0.06 ±  8%      +0.0        0.08 ±  4%  perf-profile.children.cycles-pp.enqueue_dl_entity
      0.68            +0.0        0.71        perf-profile.children.cycles-pp.xas_store
      0.12 ±  3%      +0.0        0.15 ±  2%  perf-profile.children.cycles-pp.call_timer_fn
      1.05            +0.0        1.07        perf-profile.children.cycles-pp.__alloc_pages_noprof
      0.05            +0.0        0.08        perf-profile.children.cycles-pp.hrtimer_start_range_ns
      0.06 ±  8%      +0.0        0.09 ±  5%  perf-profile.children.cycles-pp.dl_server_start
      1.02            +0.0        1.05        perf-profile.children.cycles-pp.up_write
      0.17            +0.0        0.20        perf-profile.children.cycles-pp.ttwu_do_activate
      0.10 ±  3%      +0.0        0.13 ±  7%  perf-profile.children.cycles-pp.menu_select
      0.16 ±  3%      +0.0        0.19        perf-profile.children.cycles-pp.enqueue_task_fair
      1.20            +0.0        1.24        perf-profile.children.cycles-pp.__cond_resched
      1.21            +0.0        1.24        perf-profile.children.cycles-pp.alloc_pages_mpol_noprof
      1.27            +0.0        1.30        perf-profile.children.cycles-pp.folio_alloc_noprof
      1.10            +0.0        1.13        perf-profile.children.cycles-pp.fault_in_readable
      1.14            +0.0        1.18        perf-profile.children.cycles-pp.iomap_iter_advance
      0.10 ±  4%      +0.0        0.14 ±  2%  perf-profile.children.cycles-pp.sched_balance_domains
      1.06            +0.0        1.09        perf-profile.children.cycles-pp.filemap_get_entry
      1.22            +0.0        1.25        perf-profile.children.cycles-pp.xas_load
      0.82            +0.0        0.86        perf-profile.children.cycles-pp.xfs_ilock_for_iomap
      1.68            +0.0        1.72        perf-profile.children.cycles-pp.xfs_ilock
      1.24            +0.0        1.28        perf-profile.children.cycles-pp.fault_in_iov_iter_readable
      0.21 ±  2%      +0.0        0.26        perf-profile.children.cycles-pp.sysvec_call_function_single
      1.09            +0.0        1.14        perf-profile.children.cycles-pp.ksys_lseek
      0.88            +0.0        0.93        perf-profile.children.cycles-pp.__mod_memcg_lruvec_state
      1.67            +0.1        1.72        perf-profile.children.cycles-pp.copy_page_from_iter_atomic
      0.47            +0.1        0.52        perf-profile.children.cycles-pp.task_work_run
      0.46            +0.1        0.51        perf-profile.children.cycles-pp.task_mm_cid_work
      0.73            +0.1        0.78        perf-profile.children.cycles-pp.update_process_times
      2.04            +0.1        2.10        perf-profile.children.cycles-pp.filemap_get_read_batch
      0.06 ± 19%      +0.1        0.12 ± 14%  perf-profile.children.cycles-pp.tick_irq_enter
      2.22            +0.1        2.28        perf-profile.children.cycles-pp.xfs_file_write_checks
      1.86            +0.1        1.92        perf-profile.children.cycles-pp.xfs_iunlock
      0.07 ± 15%      +0.1        0.13 ± 12%  perf-profile.children.cycles-pp.irq_enter_rcu
      0.01 ±223%      +0.1        0.07        perf-profile.children.cycles-pp.start_dl_timer
      2.38            +0.1        2.44        perf-profile.children.cycles-pp.filemap_get_pages
      0.00            +0.1        0.07 ±  7%  perf-profile.children.cycles-pp.__hrtimer_start_range_ns
      1.30            +0.1        1.37        perf-profile.children.cycles-pp.syscall_exit_to_user_mode
      0.44            +0.1        0.51 ±  2%  perf-profile.children.cycles-pp.asm_sysvec_call_function_single
      0.81            +0.1        0.88 ±  2%  perf-profile.children.cycles-pp.tick_nohz_handler
      0.20 ± 11%      +0.1        0.28 ± 10%  perf-profile.children.cycles-pp.clockevents_program_event
      0.85            +0.1        0.93 ±  2%  perf-profile.children.cycles-pp.__hrtimer_run_queues
      1.75            +0.1        1.85 ±  3%  perf-profile.children.cycles-pp.fdget_pos
      0.60            +0.1        0.71        perf-profile.children.cycles-pp.handle_softirqs
      0.62            +0.1        0.74        perf-profile.children.cycles-pp.__irq_exit_rcu
      2.76            +0.1        2.88        perf-profile.children.cycles-pp.entry_SYSCALL_64
      0.24 ±  6%      +0.1        0.38 ±  9%  perf-profile.children.cycles-pp.ktime_get
      4.73            +0.1        4.88        perf-profile.children.cycles-pp.clear_bhb_loop
      4.22            +0.2        4.37        perf-profile.children.cycles-pp.xfs_buffered_write_iomap_begin
      6.80            +0.2        6.95        perf-profile.children.cycles-pp.__filemap_get_folio
      2.70            +0.2        2.87        perf-profile.children.cycles-pp.memset_orig
      2.79            +0.2        2.96        perf-profile.children.cycles-pp.zero_user_segments
      1.10 ±  2%      +0.2        1.29 ±  4%  perf-profile.children.cycles-pp.hrtimer_interrupt
      1.13 ±  2%      +0.2        1.32 ±  4%  perf-profile.children.cycles-pp.__sysvec_apic_timer_interrupt
      3.25            +0.2        3.44        perf-profile.children.cycles-pp.iomap_set_range_uptodate
      6.25            +0.2        6.46        perf-profile.children.cycles-pp.llseek
      4.74            +0.2        4.95        perf-profile.children.cycles-pp.iomap_write_end
      6.04            +0.2        6.25        perf-profile.children.cycles-pp.iomap_iter
      4.30            +0.2        4.55        perf-profile.children.cycles-pp.__iomap_write_begin
      4.90            +0.3        5.17        perf-profile.children.cycles-pp._copy_to_iter
      5.14            +0.3        5.42        perf-profile.children.cycles-pp.copy_page_to_iter
      1.84            +0.4        2.20 ±  2%  perf-profile.children.cycles-pp.sysvec_apic_timer_interrupt
     14.77            +0.4       15.17        perf-profile.children.cycles-pp.filemap_read
     16.70            +0.4       17.13        perf-profile.children.cycles-pp.xfs_file_read_iter
     11.81            +0.4       12.25        perf-profile.children.cycles-pp.iomap_write_begin
     16.35            +0.4       16.79        perf-profile.children.cycles-pp.xfs_file_buffered_read
     18.46            +0.5       18.94        perf-profile.children.cycles-pp.vfs_read
     19.18            +0.5       19.70        perf-profile.children.cycles-pp.ksys_read
     24.52            +0.7       25.18        perf-profile.children.cycles-pp.read
      2.45            +0.7        3.16        perf-profile.children.cycles-pp.asm_sysvec_apic_timer_interrupt
     22.61            +0.9       23.46        perf-profile.children.cycles-pp.iomap_write_iter
      2.12            +0.9        3.01        perf-profile.children.cycles-pp.acpi_safe_halt
      2.12            +0.9        3.02        perf-profile.children.cycles-pp.acpi_idle_enter
      2.18            +0.9        3.11        perf-profile.children.cycles-pp.cpuidle_enter_state
      2.19            +0.9        3.12        perf-profile.children.cycles-pp.cpuidle_enter
      2.34            +1.0        3.33        perf-profile.children.cycles-pp.cpuidle_idle_call
      2.52            +1.0        3.51        perf-profile.children.cycles-pp.start_secondary
      2.54            +1.0        3.54        perf-profile.children.cycles-pp.common_startup_64
      2.54            +1.0        3.54        perf-profile.children.cycles-pp.cpu_startup_entry
      2.54            +1.0        3.54        perf-profile.children.cycles-pp.do_idle
     29.39            +1.1       30.49        perf-profile.children.cycles-pp.iomap_file_buffered_write
     33.47            +1.2       34.68        perf-profile.children.cycles-pp.xfs_file_buffered_write
     36.35            +1.3       37.66        perf-profile.children.cycles-pp.vfs_write
     37.18            +1.4       38.54        perf-profile.children.cycles-pp.ksys_write
     41.97            +1.5       43.49        perf-profile.children.cycles-pp.write
     12.68            -2.9        9.78 ±  4%  perf-profile.self.cycles-pp.osq_lock
      2.26            -0.4        1.83        perf-profile.self.cycles-pp.rwsem_spin_on_owner
      0.55            -0.1        0.44        perf-profile.self.cycles-pp.update_sg_lb_stats
      0.12 ±  4%      -0.0        0.11 ±  4%  perf-profile.self.cycles-pp.idle_cpu
      0.12 ±  5%      -0.0        0.10 ±  3%  perf-profile.self.cycles-pp.update_cfs_group
      0.10 ±  4%      -0.0        0.09 ±  4%  perf-profile.self.cycles-pp.update_load_avg
      0.22 ±  2%      +0.0        0.24 ±  2%  perf-profile.self.cycles-pp.cgroup_rstat_updated
      0.28            +0.0        0.30        perf-profile.self.cycles-pp.__folio_batch_add_and_move
      0.72            +0.0        0.74        perf-profile.self.cycles-pp.entry_SYSCALL_64_after_hwframe
      0.42            +0.0        0.44        perf-profile.self.cycles-pp.folio_unlock
      0.05            +0.0        0.07 ±  5%  perf-profile.self.cycles-pp.update_rq_clock
      0.05 ±  7%      +0.0        0.07 ±  5%  perf-profile.self.cycles-pp.menu_select
      0.54            +0.0        0.56        perf-profile.self.cycles-pp.xfs_file_buffered_write
      0.46            +0.0        0.48        perf-profile.self.cycles-pp.xfs_iext_lookup_extent
      0.43 ±  2%      +0.0        0.45        perf-profile.self.cycles-pp.iomap_write_end
      1.00            +0.0        1.03        perf-profile.self.cycles-pp.do_syscall_64
      0.58            +0.0        0.61        perf-profile.self.cycles-pp.iomap_write_begin
      0.95            +0.0        0.98        perf-profile.self.cycles-pp.iomap_write_iter
      0.78            +0.0        0.81        perf-profile.self.cycles-pp.__filemap_get_folio
      0.81            +0.0        0.84        perf-profile.self.cycles-pp.up_write
      0.86            +0.0        0.88        perf-profile.self.cycles-pp.xas_load
      0.76            +0.0        0.79        perf-profile.self.cycles-pp.iomap_file_buffered_write
      0.99            +0.0        1.02        perf-profile.self.cycles-pp.vfs_read
      1.06            +0.0        1.09        perf-profile.self.cycles-pp.xfs_buffered_write_iomap_begin
      0.60            +0.0        0.64        perf-profile.self.cycles-pp.filemap_get_entry
      1.06            +0.0        1.10        perf-profile.self.cycles-pp.fault_in_readable
      0.65            +0.0        0.69        perf-profile.self.cycles-pp.__mod_memcg_lruvec_state
      0.88 ±  2%      +0.0        0.92        perf-profile.self.cycles-pp.entry_SYSCALL_64
      1.20            +0.0        1.24        perf-profile.self.cycles-pp.vfs_write
      1.10            +0.0        1.14        perf-profile.self.cycles-pp.iomap_iter_advance
      0.74            +0.0        0.78 ±  2%  perf-profile.self.cycles-pp.llseek
      0.42 ±  2%      +0.0        0.46        perf-profile.self.cycles-pp.task_mm_cid_work
      1.64            +0.1        1.69        perf-profile.self.cycles-pp.copy_page_from_iter_atomic
      0.00            +0.1        0.06 ±  9%  perf-profile.self.cycles-pp.sched_balance_domains
      1.62 ±  2%      +0.1        1.69        perf-profile.self.cycles-pp.filemap_read
      1.66            +0.1        1.75 ±  3%  perf-profile.self.cycles-pp.fdget_pos
      0.22 ±  6%      +0.1        0.35 ± 11%  perf-profile.self.cycles-pp.ktime_get
      4.68            +0.1        4.82        perf-profile.self.cycles-pp.clear_bhb_loop
      2.68            +0.2        2.84        perf-profile.self.cycles-pp.memset_orig
      3.20            +0.2        3.39        perf-profile.self.cycles-pp.iomap_set_range_uptodate
      4.84            +0.3        5.11        perf-profile.self.cycles-pp._copy_to_iter
      0.83            +0.4        1.23        perf-profile.self.cycles-pp.acpi_safe_halt



***************************************************************************************************
lkp-skl-fpga01: 104 threads 2 sockets (Skylake) with 192G memory
=========================================================================================
compiler/cpufreq_governor/kconfig/mode/nr_task/rootfs/tbox_group/test/testcase:
  gcc-12/performance/x86_64-rhel-8.3/thread/100%/debian-12-x86_64-20240206.cgz/lkp-skl-fpga01/sched_yield/will-it-scale

commit: 
  7266f0a6d3 ("fs/bcachefs: Fix __wait_on_freeing_inode() definition of waitqueue entry")
  538d813df3 ("sched/eevdf: Reduce the computation frequency of avg_vruntime")

7266f0a6d3bb73f4 538d813df3945cbc9d6a90ba224 
---------------- --------------------------- 
         %stddev     %change         %stddev
             \          |                \  
 1.977e+08 ±  6%     +40.4%  2.776e+08 ± 15%  cpuidle..time
     44.46           -14.6%      37.96 ±  3%  vmstat.cpu.us
     54.34            +6.3       60.67 ±  2%  mpstat.cpu.all.sys%
     44.71            -6.5       38.16 ±  3%  mpstat.cpu.all.usr%
    309.97 ± 86%     -89.9%      31.19 ±211%  perf-sched.wait_time.avg.ms.__cond_resched.smpboot_thread_fn.kthread.ret_from_fork.ret_from_fork_asm
      1343 ± 93%     -86.3%     183.97 ±215%  perf-sched.wait_time.max.ms.__cond_resched.smpboot_thread_fn.kthread.ret_from_fork.ret_from_fork_asm
  65774130            -4.1%   63075951        will-it-scale.104.threads
    632443            -4.1%     606499        will-it-scale.per_thread_ops
  65774130            -4.1%   63075951        will-it-scale.workload
 1.496e+10            -1.5%  1.474e+10        perf-stat.i.branch-instructions
      0.73            +0.1        0.87 ±  4%  perf-stat.i.branch-miss-rate%
 1.098e+08           +16.7%  1.281e+08 ±  4%  perf-stat.i.branch-misses
    178.36            -2.2%     174.43        perf-stat.i.cpu-migrations
      0.73            +0.1        0.87 ±  4%  perf-stat.overall.branch-miss-rate%
    340697            +4.5%     355878        perf-stat.overall.path-length
 1.492e+10            -1.5%  1.469e+10        perf-stat.ps.branch-instructions
 1.095e+08           +16.6%  1.277e+08 ±  4%  perf-stat.ps.branch-misses
    177.83            -2.2%     173.84        perf-stat.ps.cpu-migrations
     22.46            -1.7       20.74        perf-profile.calltrace.cycles-pp.syscall_return_via_sysret.__sched_yield
     15.97            -1.6       14.32 ±  2%  perf-profile.calltrace.cycles-pp.entry_SYSRETQ_unsafe_stack.__sched_yield
      2.12 ± 19%      -0.9        1.21 ± 36%  perf-profile.calltrace.cycles-pp.testcase
      1.12 ± 11%      -0.5        0.57 ± 45%  perf-profile.calltrace.cycles-pp.perf_trace_sched_stat_runtime.update_curr.pick_task_fair.pick_next_task_fair.__schedule
      3.43            -0.4        3.06        perf-profile.calltrace.cycles-pp.do_sched_yield.__x64_sys_sched_yield.do_syscall_64.entry_SYSCALL_64_after_hwframe.__sched_yield
      7.89            -0.2        7.65        perf-profile.calltrace.cycles-pp.entry_SYSCALL_64.__sched_yield
      0.99 ±  2%      -0.2        0.79 ±  3%  perf-profile.calltrace.cycles-pp.raw_spin_rq_lock_nested.do_sched_yield.__x64_sys_sched_yield.do_syscall_64.entry_SYSCALL_64_after_hwframe
      0.90 ±  2%      -0.2        0.71 ±  3%  perf-profile.calltrace.cycles-pp._raw_spin_lock.raw_spin_rq_lock_nested.do_sched_yield.__x64_sys_sched_yield.do_syscall_64
      1.11 ±  2%      -0.1        1.03 ±  2%  perf-profile.calltrace.cycles-pp.yield_task_fair.do_sched_yield.__x64_sys_sched_yield.do_syscall_64.entry_SYSCALL_64_after_hwframe
      0.58 ±  9%      -0.1        0.53        perf-profile.calltrace.cycles-pp.__calc_delta.update_curr.pick_task_fair.pick_next_task_fair.__schedule
      0.52 ±  2%      +0.2        0.71 ±  8%  perf-profile.calltrace.cycles-pp.update_curr_se.update_curr.pick_task_fair.pick_next_task_fair.__schedule
      0.90 ±  2%      +0.5        1.44 ± 13%  perf-profile.calltrace.cycles-pp.update_curr_dl_se.update_curr.pick_task_fair.pick_next_task_fair.__schedule
      0.00            +0.7        0.71 ± 16%  perf-profile.calltrace.cycles-pp.syscall_exit_to_user_mode_prepare.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe.__sched_yield
      1.08            +0.7        1.80 ± 10%  perf-profile.calltrace.cycles-pp.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe.__sched_yield
      5.86 ±  3%      +0.8        6.62 ±  4%  perf-profile.calltrace.cycles-pp.entry_SYSCALL_64_safe_stack.__sched_yield
     97.15            +1.0       98.11        perf-profile.calltrace.cycles-pp.__sched_yield
      1.58            +2.1        3.68        perf-profile.calltrace.cycles-pp.update_min_vruntime.update_curr.pick_task_fair.pick_next_task_fair.__schedule
     26.96            +3.5       30.49 ±  2%  perf-profile.calltrace.cycles-pp.__x64_sys_sched_yield.do_syscall_64.entry_SYSCALL_64_after_hwframe.__sched_yield
     23.27            +4.0       27.22 ±  3%  perf-profile.calltrace.cycles-pp.schedule.__x64_sys_sched_yield.do_syscall_64.entry_SYSCALL_64_after_hwframe.__sched_yield
     22.59            +4.0       26.62 ±  3%  perf-profile.calltrace.cycles-pp.__schedule.schedule.__x64_sys_sched_yield.do_syscall_64.entry_SYSCALL_64_after_hwframe
     15.38 ±  2%      +4.8       20.13 ±  3%  perf-profile.calltrace.cycles-pp.pick_next_task_fair.__schedule.schedule.__x64_sys_sched_yield.do_syscall_64
     45.87            +4.8       50.67        perf-profile.calltrace.cycles-pp.entry_SYSCALL_64_after_hwframe.__sched_yield
     33.18            +5.6       38.77 ±  2%  perf-profile.calltrace.cycles-pp.do_syscall_64.entry_SYSCALL_64_after_hwframe.__sched_yield
     13.14            +5.7       18.82 ±  3%  perf-profile.calltrace.cycles-pp.pick_task_fair.pick_next_task_fair.__schedule.schedule.__x64_sys_sched_yield
      9.46            +5.9       15.34 ±  3%  perf-profile.calltrace.cycles-pp.update_curr.pick_task_fair.pick_next_task_fair.__schedule.schedule
     22.62            -1.7       20.88        perf-profile.children.cycles-pp.syscall_return_via_sysret
     16.99            -1.7       15.30 ±  2%  perf-profile.children.cycles-pp.entry_SYSRETQ_unsafe_stack
      2.12 ± 19%      -0.9        1.21 ± 36%  perf-profile.children.cycles-pp.testcase
      1.12 ± 11%      -0.5        0.61 ± 32%  perf-profile.children.cycles-pp.perf_trace_sched_stat_runtime
      3.61            -0.4        3.21        perf-profile.children.cycles-pp.do_sched_yield
      1.59            -0.2        1.36 ±  3%  perf-profile.children.cycles-pp._raw_spin_lock
      1.02 ±  2%      -0.2        0.82 ±  3%  perf-profile.children.cycles-pp.raw_spin_rq_lock_nested
      1.12 ±  2%      -0.1        1.03 ±  2%  perf-profile.children.cycles-pp.yield_task_fair
      0.60 ±  9%      -0.1        0.54        perf-profile.children.cycles-pp.__calc_delta
      0.19 ±  3%      -0.0        0.14 ±  6%  perf-profile.children.cycles-pp.sched_update_worker
      0.07 ±  5%      +0.0        0.08        perf-profile.children.cycles-pp.task_tick_fair
      0.11 ±  3%      +0.0        0.13 ±  2%  perf-profile.children.cycles-pp.sched_tick
      0.24 ±  9%      +0.0        0.28 ±  3%  perf-profile.children.cycles-pp.update_process_times
      0.09 ±  4%      +0.0        0.13 ± 12%  perf-profile.children.cycles-pp.arch_scale_cpu_capacity
      0.28 ±  8%      +0.0        0.32 ±  3%  perf-profile.children.cycles-pp.tick_nohz_handler
      0.34 ±  7%      +0.0        0.38 ±  2%  perf-profile.children.cycles-pp.__hrtimer_run_queues
      0.40 ±  6%      +0.0        0.44 ±  2%  perf-profile.children.cycles-pp.__sysvec_apic_timer_interrupt
      0.46 ±  5%      +0.0        0.51 ±  2%  perf-profile.children.cycles-pp.asm_sysvec_apic_timer_interrupt
      0.39 ±  7%      +0.0        0.44 ±  2%  perf-profile.children.cycles-pp.hrtimer_interrupt
      0.05            +0.0        0.10 ±  4%  perf-profile.children.cycles-pp.sched_yield@plt
      0.42 ±  6%      +0.0        0.47 ±  2%  perf-profile.children.cycles-pp.sysvec_apic_timer_interrupt
      1.11            +0.1        1.16 ±  3%  perf-profile.children.cycles-pp.update_rq_clock_task
      0.50 ±  2%      +0.1        0.64 ±  9%  perf-profile.children.cycles-pp.dl_scaled_delta_exec
      0.57            +0.2        0.74 ±  8%  perf-profile.children.cycles-pp.update_curr_se
      9.44            +0.2        9.63        perf-profile.children.cycles-pp.entry_SYSCALL_64
      3.20 ±  3%      +0.4        3.57 ±  3%  perf-profile.children.cycles-pp.entry_SYSCALL_64_safe_stack
      0.38 ±  4%      +0.4        0.75 ± 16%  perf-profile.children.cycles-pp.syscall_exit_to_user_mode_prepare
      0.97 ±  2%      +0.5        1.50 ± 12%  perf-profile.children.cycles-pp.update_curr_dl_se
      1.13            +0.7        1.85 ± 10%  perf-profile.children.cycles-pp.syscall_exit_to_user_mode
     97.52            +0.9       98.46        perf-profile.children.cycles-pp.__sched_yield
      1.59            +2.1        3.70        perf-profile.children.cycles-pp.update_min_vruntime
     27.20            +3.5       30.72 ±  2%  perf-profile.children.cycles-pp.__x64_sys_sched_yield
     23.29            +3.9       27.24 ±  3%  perf-profile.children.cycles-pp.schedule
     22.77            +4.0       26.78 ±  3%  perf-profile.children.cycles-pp.__schedule
     15.44 ±  2%      +4.8       20.19 ±  3%  perf-profile.children.cycles-pp.pick_next_task_fair
     46.20            +4.8       50.97        perf-profile.children.cycles-pp.entry_SYSCALL_64_after_hwframe
     13.50            +5.6       19.09 ±  3%  perf-profile.children.cycles-pp.pick_task_fair
     33.34            +5.6       38.94 ±  2%  perf-profile.children.cycles-pp.do_syscall_64
      9.89            +5.7       15.60 ±  3%  perf-profile.children.cycles-pp.update_curr
     22.55            -1.7       20.84        perf-profile.self.cycles-pp.syscall_return_via_sysret
     16.83            -1.7       15.15 ±  2%  perf-profile.self.cycles-pp.entry_SYSRETQ_unsafe_stack
      1.84 ± 21%      -0.9        0.97 ± 44%  perf-profile.self.cycles-pp.testcase
     13.04            -0.8       12.20        perf-profile.self.cycles-pp.entry_SYSCALL_64_after_hwframe
      1.88 ±  7%      -0.8        1.06 ±  8%  perf-profile.self.cycles-pp.pick_next_task_fair
      3.97 ±  2%      -0.8        3.20 ±  6%  perf-profile.self.cycles-pp.__schedule
      2.82 ±  8%      -0.6        2.18 ±  2%  perf-profile.self.cycles-pp.__sched_yield
      1.09 ± 11%      -0.5        0.59 ± 32%  perf-profile.self.cycles-pp.perf_trace_sched_stat_runtime
      1.54            -0.2        1.30 ±  4%  perf-profile.self.cycles-pp._raw_spin_lock
      1.20            -0.1        1.09 ±  2%  perf-profile.self.cycles-pp.do_sched_yield
      0.52 ±  2%      -0.1        0.45 ±  2%  perf-profile.self.cycles-pp.schedule
      1.00 ±  2%      -0.1        0.94 ±  2%  perf-profile.self.cycles-pp.yield_task_fair
      0.58 ±  8%      -0.1        0.53        perf-profile.self.cycles-pp.__calc_delta
      0.55            -0.0        0.52        perf-profile.self.cycles-pp.entry_SYSCALL_64_safe_stack
      0.17 ±  2%      -0.0        0.14 ±  4%  perf-profile.self.cycles-pp.sched_update_worker
      0.25 ±  2%      -0.0        0.23 ±  3%  perf-profile.self.cycles-pp.__x64_sys_sched_yield
      0.08 ±  5%      +0.0        0.10 ±  6%  perf-profile.self.cycles-pp.arch_scale_cpu_capacity
      0.05            +0.0        0.10 ±  5%  perf-profile.self.cycles-pp.sched_yield@plt
      0.44 ±  2%      +0.1        0.58 ±  9%  perf-profile.self.cycles-pp.dl_scaled_delta_exec
      0.50 ±  2%      +0.2        0.68 ±  8%  perf-profile.self.cycles-pp.update_curr_se
      8.34            +0.2        8.56        perf-profile.self.cycles-pp.entry_SYSCALL_64
      0.72            +0.4        1.08 ±  7%  perf-profile.self.cycles-pp.syscall_exit_to_user_mode
      0.34 ±  3%      +0.4        0.70 ± 16%  perf-profile.self.cycles-pp.syscall_exit_to_user_mode_prepare
      0.49 ±  2%      +0.4        0.88 ± 15%  perf-profile.self.cycles-pp.update_curr_dl_se
      4.30 ±  3%      +1.4        5.66 ±  5%  perf-profile.self.cycles-pp.do_syscall_64
      1.47 ±  2%      +2.2        3.68        perf-profile.self.cycles-pp.update_min_vruntime
      4.05            +3.4        7.47 ±  4%  perf-profile.self.cycles-pp.update_curr





Disclaimer:
Results have been estimated based on internal Intel analysis and are provided
for informational purposes only. Any difference in system hardware or software
design or configuration may affect actual performance.


-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
[PATCH v3] sched/eevdf: Remove the consideration of the current task's time in vruntime_eligible() and avg_vruntime()
Posted by Xavier 1 month, 1 week ago
The current code subtracts the value of curr from avg_vruntime and
avg_load during runtime. Then, every time avg_vruntime() and
vruntime_eligible() are called, the values of curr need to be added to
the avg_vruntime and avg_load.

There is no need to subtract curr’s load from avg_load during runtime.
Instead, we only need to calculate the incremental change and update
tot_vruntime whenever curr’s time is updated. This can effectively reduce
the number of calculations for curr’s time.

To better represent their functions, rename the original avg_vruntime and
avg_load to tot_vruntime and tot_load, respectively, which more accurately
describes their roles in the computation.

Signed-off-by: Xavier <xavier_qy@163.com>
---

Note:
To address the performance decline in V2, the V3 patch reduces the unnecessary
 calculations of avg_vruntime.


 kernel/sched/fair.c  | 104 ++++++++++++++++++++++++-------------------
 kernel/sched/sched.h |   3 +-
 2 files changed, 59 insertions(+), 48 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 6512258dc71..cbcdf8686fe 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -606,8 +606,8 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
  * Which we track using:
  *
  *                    v0 := cfs_rq->min_vruntime
- * \Sum (v_i - v0) * w_i := cfs_rq->avg_vruntime
- *              \Sum w_i := cfs_rq->avg_load
+ * \Sum (v_i - v0) * w_i := cfs_rq->tot_vruntime
+ *              \Sum w_i := cfs_rq->tot_load
  *
  * Since min_vruntime is a monotonic increasing variable that closely tracks
  * the per-task service, these deltas: (v_i - v), will be in the order of the
@@ -617,14 +617,28 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
  *
  * As measured, the max (key * weight) value was ~44 bits for a kernel build.
  */
+static inline void avg_vruntime_update(struct cfs_rq *cfs_rq)
+{
+	s64	tot_vruntime = cfs_rq->tot_vruntime;
+
+	/* sign flips effective floor / ceiling */
+	if (cfs_rq->tot_load) {
+		if (tot_vruntime < 0)
+			tot_vruntime -= (cfs_rq->tot_load - 1);
+		cfs_rq->avg_vruntime = div_s64(tot_vruntime, cfs_rq->tot_load);
+	} else {
+		cfs_rq->avg_vruntime = cfs_rq->tot_vruntime;
+	}
+}
+
 static void
 avg_vruntime_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	unsigned long weight = scale_load_down(se->load.weight);
 	s64 key = entity_key(cfs_rq, se);
 
-	cfs_rq->avg_vruntime += key * weight;
-	cfs_rq->avg_load += weight;
+	cfs_rq->tot_vruntime += key * weight;
+	cfs_rq->tot_load += weight;
 }
 
 static void
@@ -633,17 +647,26 @@ avg_vruntime_sub(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	unsigned long weight = scale_load_down(se->load.weight);
 	s64 key = entity_key(cfs_rq, se);
 
-	cfs_rq->avg_vruntime -= key * weight;
-	cfs_rq->avg_load -= weight;
+	cfs_rq->tot_vruntime -= key * weight;
+	cfs_rq->tot_load -= weight;
+}
+
+static inline
+void tot_vruntime_update_for_curr(struct cfs_rq *cfs_rq, s64 delta)
+{
+	struct sched_entity *curr = cfs_rq->curr;
+	unsigned long weight = scale_load_down(curr->load.weight);
+
+	cfs_rq->tot_vruntime += delta * weight;
 }
 
 static inline
-void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
+void tot_vruntime_update_for_minv(struct cfs_rq *cfs_rq, s64 delta)
 {
 	/*
-	 * v' = v + d ==> avg_vruntime' = avg_runtime - d*avg_load
+	 * v' = v + d ==> avg_runtime' = tot_runtime - d*tot_load
 	 */
-	cfs_rq->avg_vruntime -= cfs_rq->avg_load * delta;
+	cfs_rq->tot_vruntime -= cfs_rq->tot_load * delta;
 }
 
 /*
@@ -652,25 +675,9 @@ void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
  */
 u64 avg_vruntime(struct cfs_rq *cfs_rq)
 {
-	struct sched_entity *curr = cfs_rq->curr;
-	s64 avg = cfs_rq->avg_vruntime;
-	long load = cfs_rq->avg_load;
-
-	if (curr && curr->on_rq) {
-		unsigned long weight = scale_load_down(curr->load.weight);
-
-		avg += entity_key(cfs_rq, curr) * weight;
-		load += weight;
-	}
-
-	if (load) {
-		/* sign flips effective floor / ceiling */
-		if (avg < 0)
-			avg -= (load - 1);
-		avg = div_s64(avg, load);
-	}
+	avg_vruntime_update(cfs_rq);
 
-	return cfs_rq->min_vruntime + avg;
+	return cfs_rq->min_vruntime + cfs_rq->avg_vruntime;
 }
 
 /*
@@ -725,18 +732,8 @@ static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
  */
 static int vruntime_eligible(struct cfs_rq *cfs_rq, u64 vruntime)
 {
-	struct sched_entity *curr = cfs_rq->curr;
-	s64 avg = cfs_rq->avg_vruntime;
-	long load = cfs_rq->avg_load;
-
-	if (curr && curr->on_rq) {
-		unsigned long weight = scale_load_down(curr->load.weight);
-
-		avg += entity_key(cfs_rq, curr) * weight;
-		load += weight;
-	}
-
-	return avg >= (s64)(vruntime - cfs_rq->min_vruntime) * load;
+	return cfs_rq->tot_vruntime >=
+			(s64)(vruntime - cfs_rq->min_vruntime) * (s64)cfs_rq->tot_load;
 }
 
 int entity_eligible(struct cfs_rq *cfs_rq, struct sched_entity *se)
@@ -752,7 +749,7 @@ static u64 __update_min_vruntime(struct cfs_rq *cfs_rq, u64 vruntime)
 	 */
 	s64 delta = (s64)(vruntime - min_vruntime);
 	if (delta > 0) {
-		avg_vruntime_update(cfs_rq, delta);
+		tot_vruntime_update_for_minv(cfs_rq, delta);
 		min_vruntime = vruntime;
 	}
 	return min_vruntime;
@@ -851,7 +848,6 @@ RB_DECLARE_CALLBACKS(static, min_vruntime_cb, struct sched_entity,
  */
 static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
-	avg_vruntime_add(cfs_rq, se);
 	se->min_vruntime = se->vruntime;
 	se->min_slice = se->slice;
 	rb_add_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
@@ -862,7 +858,6 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	rb_erase_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
 				  &min_vruntime_cb);
-	avg_vruntime_sub(cfs_rq, se);
 }
 
 struct sched_entity *__pick_root_entity(struct cfs_rq *cfs_rq)
@@ -1219,6 +1214,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
 	struct rq *rq = rq_of(cfs_rq);
 	s64 delta_exec;
 	bool resched;
+	s64 vdelta_exec;
 
 	if (unlikely(!curr))
 		return;
@@ -1227,8 +1223,11 @@ static void update_curr(struct cfs_rq *cfs_rq)
 	if (unlikely(delta_exec <= 0))
 		return;
 
-	curr->vruntime += calc_delta_fair(delta_exec, curr);
+	vdelta_exec = calc_delta_fair(delta_exec, curr);
+	curr->vruntime += vdelta_exec;
 	resched = update_deadline(cfs_rq, curr);
+
+	tot_vruntime_update_for_curr(cfs_rq, vdelta_exec);
 	update_min_vruntime(cfs_rq);
 
 	if (entity_is_task(curr)) {
@@ -3883,6 +3882,8 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
 		avruntime = avg_vruntime(cfs_rq);
 		if (!curr)
 			__dequeue_entity(cfs_rq, se);
+
+		avg_vruntime_sub(cfs_rq, se);
 		update_load_sub(&cfs_rq->load, se->load.weight);
 	}
 	dequeue_load_avg(cfs_rq, se);
@@ -3913,6 +3914,8 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
 		if (!curr)
 			__enqueue_entity(cfs_rq, se);
 
+		avg_vruntime_add(cfs_rq, se);
+
 		/*
 		 * The entity's vruntime has been adjusted, so let's check
 		 * whether the rq-wide min_vruntime needs updated too. Since
@@ -5281,7 +5284,6 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 	 * EEVDF: placement strategy #1 / #2
 	 */
 	if (sched_feat(PLACE_LAG) && cfs_rq->nr_running && se->vlag) {
-		struct sched_entity *curr = cfs_rq->curr;
 		unsigned long load;
 
 		lag = se->vlag;
@@ -5338,9 +5340,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 		 *
 		 *   vl_i = (W + w_i)*vl'_i / W
 		 */
-		load = cfs_rq->avg_load;
-		if (curr && curr->on_rq)
-			load += scale_load_down(curr->load.weight);
+		load = cfs_rq->tot_load;
 
 		lag *= load + scale_load_down(se->load.weight);
 		if (WARN_ON_ONCE(!load))
@@ -5427,6 +5427,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 	update_stats_enqueue_fair(cfs_rq, se, flags);
 	if (!curr)
 		__enqueue_entity(cfs_rq, se);
+
+	avg_vruntime_add(cfs_rq, se);
 	se->on_rq = 1;
 
 	if (cfs_rq->nr_running == 1) {
@@ -5530,6 +5532,8 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 
 	if (se != cfs_rq->curr)
 		__dequeue_entity(cfs_rq, se);
+
+	avg_vruntime_sub(cfs_rq, se);
 	se->on_rq = 0;
 	account_entity_dequeue(cfs_rq, se);
 
@@ -6927,11 +6931,17 @@ requeue_delayed_entity(struct sched_entity *se)
 			cfs_rq->nr_running--;
 			if (se != cfs_rq->curr)
 				__dequeue_entity(cfs_rq, se);
+			avg_vruntime_sub(cfs_rq, se);
+
 			se->vlag = 0;
 			place_entity(cfs_rq, se, 0);
+
 			if (se != cfs_rq->curr)
 				__enqueue_entity(cfs_rq, se);
+			avg_vruntime_add(cfs_rq, se);
 			cfs_rq->nr_running++;
+
+			update_min_vruntime(cfs_rq);
 		}
 	}
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 5f3de82ec9c..b5b4062db3e 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -650,8 +650,9 @@ struct cfs_rq {
 	unsigned int		idle_nr_running;   /* SCHED_IDLE */
 	unsigned int		idle_h_nr_running; /* SCHED_IDLE */
 
+	s64			tot_vruntime;
 	s64			avg_vruntime;
-	u64			avg_load;
+	u64			tot_load;
 
 	u64			min_vruntime;
 #ifdef CONFIG_SCHED_CORE
-- 
2.45.2

Re: [PATCH v3] sched/eevdf: Remove the consideration of the current task's time in vruntime_eligible() and avg_vruntime()
Posted by kernel test robot 1 month ago

Hello,

kernel test robot noticed a 3.5% regression of reaim.jobs_per_min on:


commit: e14321ce0d4a9c7a2894b119840a9d58ec4cd25d ("[PATCH v3] sched/eevdf: Remove the consideration of the current task's time in  vruntime_eligible() and avg_vruntime()")
url: https://github.com/intel-lab-lkp/linux/commits/Xavier/sched-eevdf-Remove-the-consideration-of-the-current-task-s-time-in-vruntime_eligible-and-avg_vruntime/20241018-225521
base: https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git af0c8b2bf67b25756f27644936e74fd9a6273bd2
patch link: https://lore.kernel.org/all/20241018145216.1021494-1-xavier_qy@163.com/
patch subject: [PATCH v3] sched/eevdf: Remove the consideration of the current task's time in  vruntime_eligible() and avg_vruntime()

testcase: reaim
config: x86_64-rhel-8.3
compiler: gcc-12
test machine: 48 threads 2 sockets Intel(R) Xeon(R) CPU E5-2697 v2 @ 2.70GHz (Ivy Bridge-EP) with 64G memory
parameters:

	runtime: 300s
	nr_task: 100%
	test: custom
	cpufreq_governor: performance




If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <oliver.sang@intel.com>
| Closes: https://lore.kernel.org/oe-lkp/202410251525.9f85854d-oliver.sang@intel.com


Details are as below:
-------------------------------------------------------------------------------------------------->


The kernel config and materials to reproduce are available at:
https://download.01.org/0day-ci/archive/20241025/202410251525.9f85854d-oliver.sang@intel.com

=========================================================================================
compiler/cpufreq_governor/kconfig/nr_task/rootfs/runtime/tbox_group/test/testcase:
  gcc-12/performance/x86_64-rhel-8.3/100%/debian-12-x86_64-20240206.cgz/300s/lkp-ivb-2ep2/custom/reaim

commit: 
  af0c8b2bf6 ("sched: Split scheduler and execution contexts")
  e14321ce0d ("sched/eevdf: Remove the consideration of the current task's time in vruntime_eligible() and avg_vruntime()")

af0c8b2bf67b2575 e14321ce0d4a9c7a2894b119840 
---------------- --------------------------- 
         %stddev     %change         %stddev
             \          |                \  
      0.14            +0.0        0.19        mpstat.cpu.all.soft%
    111.00 ± 80%     -89.9%      11.17 ± 23%  mpstat.max_utilization.seconds
      5858 ±  4%      -8.9%       5335 ±  4%  numa-meminfo.node1.KernelStack
     25970 ± 53%     -40.2%      15518 ± 51%  numa-meminfo.node1.Mapped
      5859 ±  4%      -8.9%       5337 ±  4%  numa-vmstat.node1.nr_kernel_stack
      6643 ± 52%     -40.9%       3927 ± 50%  numa-vmstat.node1.nr_mapped
     22526            -9.4%      20413        vmstat.system.cs
     27118            -2.9%      26328        vmstat.system.in
    224657 ±  2%      -9.1%     204143        meminfo.Active
    224625 ±  2%      -9.1%     204111        meminfo.Active(anon)
    239022 ±  2%      -9.0%     217445        meminfo.Shmem
     56114 ±  2%      -9.0%      51064        proc-vmstat.nr_active_anon
     12421            -2.5%      12115        proc-vmstat.nr_mapped
     59755 ±  2%      -9.0%      54398        proc-vmstat.nr_shmem
     56114 ±  2%      -9.0%      51064        proc-vmstat.nr_zone_active_anon
  69086758            -1.1%   68296855        proc-vmstat.numa_hit
  69043741            -1.2%   68247974        proc-vmstat.numa_local
     91813            -7.1%      85252        proc-vmstat.pgactivate
  71568239            -1.1%   70771661        proc-vmstat.pgalloc_normal
  84550200            -1.0%   83664581        proc-vmstat.pgfault
  71455511            -1.1%   70664205        proc-vmstat.pgfree
     51.42            -1.0%      50.90        reaim.child_utime
    200350            -3.5%     193411        reaim.jobs_per_min
      4173            -3.5%       4029        reaim.jobs_per_min_child
    208595            -2.4%     203678        reaim.max_jobs_per_min
      1.37            +3.6%       1.42        reaim.parent_time
      1.54           +48.0%       2.28        reaim.std_dev_percent
      0.02 ±  2%     +46.6%       0.03        reaim.std_dev_time
    675635           -67.8%     217563        reaim.time.involuntary_context_switches
  83620119            -1.1%   82730368        reaim.time.minor_page_faults
      1789            -2.4%       1746        reaim.time.percent_of_cpu_this_job_got
    829.97            -2.1%     812.88        reaim.time.system_time
      4578            -2.1%       4481        reaim.time.user_time
   3394885            -2.5%    3311673        reaim.time.voluntary_context_switches
    427200            -1.1%     422400        reaim.workload
      1.73            -2.8%       1.68        perf-stat.i.MPKI
 3.963e+09            +8.3%  4.291e+09        perf-stat.i.branch-instructions
      2.89            +0.1        3.01        perf-stat.i.branch-miss-rate%
  92238582            +7.8%   99393054        perf-stat.i.branch-misses
 5.351e+08            +7.9%  5.772e+08        perf-stat.i.cache-references
     48004            +2.9%      49407        perf-stat.i.cpu-clock
 5.535e+10            +7.4%  5.946e+10        perf-stat.i.cpu-cycles
      4872            -1.6%       4794        perf-stat.i.cpu-migrations
      2440            -1.7%       2398        perf-stat.i.cycles-between-cache-misses
  2.87e+10            +8.3%  3.108e+10        perf-stat.i.instructions
      0.49            +1.3%       0.50        perf-stat.i.ipc
     21.46            +5.5%      22.63        perf-stat.i.major-faults
     11.61           -15.1%       9.86        perf-stat.i.metric.K/sec
    276192            +9.0%     301016        perf-stat.i.minor-faults
    276214            +9.0%     301038        perf-stat.i.page-faults
     48004            +2.9%      49407        perf-stat.i.task-clock
   5644916 ± 18%  +35162.3%  1.991e+09 ± 17%  sched_debug.cfs_rq:/.avg_vruntime.avg
  17097981 ± 48%  +15093.3%  2.598e+09 ± 16%  sched_debug.cfs_rq:/.avg_vruntime.max
   2702756 ± 10%  +45989.8%  1.246e+09 ± 28%  sched_debug.cfs_rq:/.avg_vruntime.min
   2873896 ± 37%   +9275.7%  2.694e+08 ± 22%  sched_debug.cfs_rq:/.avg_vruntime.stddev
     40604 ± 76%  +1.3e+05%   53761749 ± 49%  sched_debug.cfs_rq:/.left_deadline.avg
   1438807 ± 57%  +70833.0%  1.021e+09 ± 25%  sched_debug.cfs_rq:/.left_deadline.max
    224842 ± 61%  +90999.7%  2.048e+08 ± 25%  sched_debug.cfs_rq:/.left_deadline.stddev
     40603 ± 76%  +1.3e+05%   53761750 ± 49%  sched_debug.cfs_rq:/.left_vruntime.avg
   1438775 ± 57%  +70834.6%  1.021e+09 ± 25%  sched_debug.cfs_rq:/.left_vruntime.max
    224837 ± 61%  +91001.7%  2.048e+08 ± 25%  sched_debug.cfs_rq:/.left_vruntime.stddev
     30025 ± 19%     +54.8%      46483 ± 23%  sched_debug.cfs_rq:/.load.avg
     95346 ± 38%     +72.5%     164455 ± 16%  sched_debug.cfs_rq:/.load.stddev
   5644917 ± 18%  +35161.5%   1.99e+09 ± 17%  sched_debug.cfs_rq:/.min_vruntime.avg
  17097981 ± 48%  +15092.9%  2.598e+09 ± 16%  sched_debug.cfs_rq:/.min_vruntime.max
   2702756 ± 10%  +45988.6%  1.246e+09 ± 28%  sched_debug.cfs_rq:/.min_vruntime.min
   2873897 ± 37%   +9275.6%  2.694e+08 ± 22%  sched_debug.cfs_rq:/.min_vruntime.stddev
      0.25 ± 11%     +30.7%       0.33 ± 10%  sched_debug.cfs_rq:/.nr_running.stddev
     40603 ± 76%  +1.3e+05%   53761750 ± 49%  sched_debug.cfs_rq:/.right_vruntime.avg
   1438775 ± 57%  +70834.6%  1.021e+09 ± 25%  sched_debug.cfs_rq:/.right_vruntime.max
    224837 ± 61%  +91001.7%  2.048e+08 ± 25%  sched_debug.cfs_rq:/.right_vruntime.stddev
      0.00 ±223%  +21421.5%       0.23 ± 66%  sched_debug.cfs_rq:/.spread.avg
      0.05 ±223%  +20478.3%      10.64 ± 64%  sched_debug.cfs_rq:/.spread.max
      0.01 ±223%  +20485.4%       1.52 ± 64%  sched_debug.cfs_rq:/.spread.stddev
     93.49 ± 55%     -56.5%      40.71 ± 74%  sched_debug.cfs_rq:/.util_avg.min
      7.50 ± 10%      -2.8        4.65        perf-profile.calltrace.cycles-pp.entry_SYSCALL_64_after_hwframe
      7.48 ± 10%      -2.8        4.64        perf-profile.calltrace.cycles-pp.do_syscall_64.entry_SYSCALL_64_after_hwframe
      8.97 ±  2%      -1.4        7.59        perf-profile.calltrace.cycles-pp.common_startup_64
      8.80 ±  2%      -1.4        7.42        perf-profile.calltrace.cycles-pp.cpu_startup_entry.start_secondary.common_startup_64
      8.80 ±  2%      -1.4        7.42        perf-profile.calltrace.cycles-pp.start_secondary.common_startup_64
      8.78 ±  2%      -1.4        7.42        perf-profile.calltrace.cycles-pp.do_idle.cpu_startup_entry.start_secondary.common_startup_64
      8.54 ±  2%      -1.3        7.22        perf-profile.calltrace.cycles-pp.cpuidle_idle_call.do_idle.cpu_startup_entry.start_secondary.common_startup_64
      8.03 ±  2%      -1.2        6.86        perf-profile.calltrace.cycles-pp.cpuidle_enter.cpuidle_idle_call.do_idle.cpu_startup_entry.start_secondary
      7.96 ±  2%      -1.1        6.81        perf-profile.calltrace.cycles-pp.cpuidle_enter_state.cpuidle_enter.cpuidle_idle_call.do_idle.cpu_startup_entry
      3.58 ±  4%      -0.7        2.88        perf-profile.calltrace.cycles-pp.x64_sys_call.do_syscall_64.entry_SYSCALL_64_after_hwframe
      3.55 ±  4%      -0.7        2.86        perf-profile.calltrace.cycles-pp.__x64_sys_exit_group.x64_sys_call.do_syscall_64.entry_SYSCALL_64_after_hwframe
      3.55 ±  4%      -0.7        2.86        perf-profile.calltrace.cycles-pp.do_group_exit.__x64_sys_exit_group.x64_sys_call.do_syscall_64.entry_SYSCALL_64_after_hwframe
      3.55 ±  4%      -0.7        2.86        perf-profile.calltrace.cycles-pp.do_exit.do_group_exit.__x64_sys_exit_group.x64_sys_call.do_syscall_64
      6.16 ±  2%      -0.6        5.53        perf-profile.calltrace.cycles-pp.intel_idle.cpuidle_enter_state.cpuidle_enter.cpuidle_idle_call.do_idle
      2.31 ±  7%      -0.6        1.69        perf-profile.calltrace.cycles-pp.asm_exc_page_fault
      2.23 ±  7%      -0.6        1.62        perf-profile.calltrace.cycles-pp.exc_page_fault.asm_exc_page_fault
      2.18 ±  7%      -0.6        1.59        perf-profile.calltrace.cycles-pp.do_user_addr_fault.exc_page_fault.asm_exc_page_fault
      2.84 ±  4%      -0.5        2.31 ±  2%  perf-profile.calltrace.cycles-pp.exit_mm.do_exit.do_group_exit.__x64_sys_exit_group.x64_sys_call
      2.83 ±  4%      -0.5        2.31 ±  2%  perf-profile.calltrace.cycles-pp.mmput.exit_mm.do_exit.do_group_exit.__x64_sys_exit_group
      2.82 ±  4%      -0.5        2.30 ±  2%  perf-profile.calltrace.cycles-pp.exit_mmap.mmput.exit_mm.do_exit.do_group_exit
      1.87 ±  7%      -0.5        1.36        perf-profile.calltrace.cycles-pp.handle_mm_fault.do_user_addr_fault.exc_page_fault.asm_exc_page_fault
      1.63 ±  4%      -0.5        1.14 ±  2%  perf-profile.calltrace.cycles-pp.asm_sysvec_apic_timer_interrupt.cpuidle_enter_state.cpuidle_enter.cpuidle_idle_call.do_idle
      1.73 ±  7%      -0.5        1.26        perf-profile.calltrace.cycles-pp.__handle_mm_fault.handle_mm_fault.do_user_addr_fault.exc_page_fault.asm_exc_page_fault
      1.54 ±  4%      -0.5        1.08 ±  2%  perf-profile.calltrace.cycles-pp.sysvec_apic_timer_interrupt.asm_sysvec_apic_timer_interrupt.cpuidle_enter_state.cpuidle_enter.cpuidle_idle_call
      1.00 ±  4%      -0.3        0.70 ±  2%  perf-profile.calltrace.cycles-pp.__sysvec_apic_timer_interrupt.sysvec_apic_timer_interrupt.asm_sysvec_apic_timer_interrupt.cpuidle_enter_state.cpuidle_enter
      0.98 ±  4%      -0.3        0.68 ±  2%  perf-profile.calltrace.cycles-pp.hrtimer_interrupt.__sysvec_apic_timer_interrupt.sysvec_apic_timer_interrupt.asm_sysvec_apic_timer_interrupt.cpuidle_enter_state
      1.21 ±  3%      -0.3        0.96        perf-profile.calltrace.cycles-pp.setlocale
      1.24 ±  4%      -0.2        1.01        perf-profile.calltrace.cycles-pp.unmap_vmas.exit_mmap.mmput.exit_mm.do_exit
      0.78 ±  5%      -0.2        0.55 ±  2%  perf-profile.calltrace.cycles-pp.__hrtimer_run_queues.hrtimer_interrupt.__sysvec_apic_timer_interrupt.sysvec_apic_timer_interrupt.asm_sysvec_apic_timer_interrupt
      0.87 ±  7%      -0.2        0.65 ±  3%  perf-profile.calltrace.cycles-pp.do_fault.__handle_mm_fault.handle_mm_fault.do_user_addr_fault.exc_page_fault
      1.17 ±  4%      -0.2        0.95        perf-profile.calltrace.cycles-pp.unmap_page_range.unmap_vmas.exit_mmap.mmput.exit_mm
      1.14 ±  4%      -0.2        0.92        perf-profile.calltrace.cycles-pp.zap_pmd_range.unmap_page_range.unmap_vmas.exit_mmap.mmput
      1.11 ±  4%      -0.2        0.90        perf-profile.calltrace.cycles-pp.zap_pte_range.zap_pmd_range.unmap_page_range.unmap_vmas.exit_mmap
      0.77 ±  7%      -0.2        0.57 ±  3%  perf-profile.calltrace.cycles-pp.do_read_fault.do_fault.__handle_mm_fault.handle_mm_fault.do_user_addr_fault
      0.73 ±  7%      -0.2        0.54 ±  2%  perf-profile.calltrace.cycles-pp.filemap_map_pages.do_read_fault.do_fault.__handle_mm_fault.handle_mm_fault
      1.56            -0.2        1.37        perf-profile.calltrace.cycles-pp.__x64_sys_execve.do_syscall_64.entry_SYSCALL_64_after_hwframe.execve
      1.56            -0.2        1.37        perf-profile.calltrace.cycles-pp.do_syscall_64.entry_SYSCALL_64_after_hwframe.execve
      1.56            -0.2        1.37        perf-profile.calltrace.cycles-pp.entry_SYSCALL_64_after_hwframe.execve
      1.56            -0.2        1.38        perf-profile.calltrace.cycles-pp.execve
      0.92 ±  3%      -0.2        0.74 ±  2%  perf-profile.calltrace.cycles-pp.zap_present_ptes.zap_pte_range.zap_pmd_range.unmap_page_range.unmap_vmas
      1.55            -0.2        1.37        perf-profile.calltrace.cycles-pp.do_execveat_common.__x64_sys_execve.do_syscall_64.entry_SYSCALL_64_after_hwframe.execve
      1.11            -0.1        0.98        perf-profile.calltrace.cycles-pp.bprm_execve.do_execveat_common.__x64_sys_execve.do_syscall_64.entry_SYSCALL_64_after_hwframe
      1.00 ±  2%      -0.1        0.89        perf-profile.calltrace.cycles-pp.search_binary_handler.exec_binprm.bprm_execve.do_execveat_common.__x64_sys_execve
      1.00 ±  2%      -0.1        0.89        perf-profile.calltrace.cycles-pp.exec_binprm.bprm_execve.do_execveat_common.__x64_sys_execve.do_syscall_64
      0.97 ±  2%      -0.1        0.86        perf-profile.calltrace.cycles-pp.load_elf_binary.search_binary_handler.exec_binprm.bprm_execve.do_execveat_common
      0.63 ±  6%      -0.1        0.53 ±  3%  perf-profile.calltrace.cycles-pp.free_pgtables.exit_mmap.mmput.exit_mm.do_exit
      0.80 ±  4%      +0.0        0.85 ±  2%  perf-profile.calltrace.cycles-pp.brk
      1.92 ±  2%      +0.1        2.00        perf-profile.calltrace.cycles-pp.do_syscall_64.entry_SYSCALL_64_after_hwframe._Fork
      1.92 ±  2%      +0.1        2.00        perf-profile.calltrace.cycles-pp.entry_SYSCALL_64_after_hwframe._Fork
      1.91 ±  2%      +0.1        2.00        perf-profile.calltrace.cycles-pp.__do_sys_clone.do_syscall_64.entry_SYSCALL_64_after_hwframe._Fork
      1.91 ±  2%      +0.1        2.00        perf-profile.calltrace.cycles-pp.kernel_clone.__do_sys_clone.do_syscall_64.entry_SYSCALL_64_after_hwframe._Fork
      2.18 ±  2%      +0.1        2.28        perf-profile.calltrace.cycles-pp._Fork
      0.00            +0.5        0.52 ±  2%  perf-profile.calltrace.cycles-pp.asm_sysvec_apic_timer_interrupt.sieve.runloop.multiuser.main
      0.47 ± 79%      +0.7        1.15 ± 14%  perf-profile.calltrace.cycles-pp.native_queued_spin_lock_slowpath._raw_spin_lock.scan_positives.dcache_readdir.iterate_dir
      0.73 ± 56%      +0.7        1.43 ± 12%  perf-profile.calltrace.cycles-pp.dcache_readdir.iterate_dir.__x64_sys_getdents64.do_syscall_64.entry_SYSCALL_64_after_hwframe
      0.73 ± 56%      +0.7        1.43 ± 12%  perf-profile.calltrace.cycles-pp.__x64_sys_getdents64.do_syscall_64.entry_SYSCALL_64_after_hwframe.getdents64
      0.73 ± 56%      +0.7        1.43 ± 12%  perf-profile.calltrace.cycles-pp.do_syscall_64.entry_SYSCALL_64_after_hwframe.getdents64
      0.73 ± 56%      +0.7        1.43 ± 12%  perf-profile.calltrace.cycles-pp.entry_SYSCALL_64_after_hwframe.getdents64
      0.73 ± 56%      +0.7        1.43 ± 12%  perf-profile.calltrace.cycles-pp.iterate_dir.__x64_sys_getdents64.do_syscall_64.entry_SYSCALL_64_after_hwframe.getdents64
      0.73 ± 56%      +0.7        1.44 ± 12%  perf-profile.calltrace.cycles-pp.getdents64
      0.55 ± 78%      +0.7        1.27 ± 13%  perf-profile.calltrace.cycles-pp._raw_spin_lock.scan_positives.dcache_readdir.iterate_dir.__x64_sys_getdents64
      0.58 ± 78%      +0.8        1.34 ± 13%  perf-profile.calltrace.cycles-pp.scan_positives.dcache_readdir.iterate_dir.__x64_sys_getdents64.do_syscall_64
     66.34            +5.5       71.89        perf-profile.calltrace.cycles-pp.sieve.runloop.multiuser.main
     69.74            +5.8       75.52        perf-profile.calltrace.cycles-pp.main
     69.74            +5.8       75.52        perf-profile.calltrace.cycles-pp.multiuser.main
     69.73            +5.8       75.51        perf-profile.calltrace.cycles-pp.runloop.multiuser.main
     16.72 ±  4%      -2.5       14.23        perf-profile.children.cycles-pp.entry_SYSCALL_64_after_hwframe
     16.67 ±  4%      -2.5       14.18        perf-profile.children.cycles-pp.do_syscall_64
      8.97 ±  2%      -1.4        7.59        perf-profile.children.cycles-pp.common_startup_64
      8.97 ±  2%      -1.4        7.59        perf-profile.children.cycles-pp.cpu_startup_entry
      8.97 ±  2%      -1.4        7.59        perf-profile.children.cycles-pp.do_idle
      8.80 ±  2%      -1.4        7.42        perf-profile.children.cycles-pp.start_secondary
      8.72 ±  2%      -1.3        7.38        perf-profile.children.cycles-pp.cpuidle_idle_call
      8.20 ±  2%      -1.2        7.02        perf-profile.children.cycles-pp.cpuidle_enter
      8.18 ±  2%      -1.2        7.00        perf-profile.children.cycles-pp.cpuidle_enter_state
      3.73 ±  4%      -0.7        3.02        perf-profile.children.cycles-pp.x64_sys_call
      3.64 ±  4%      -0.7        2.95        perf-profile.children.cycles-pp.__x64_sys_exit_group
      3.64 ±  4%      -0.7        2.95        perf-profile.children.cycles-pp.do_exit
      3.64 ±  4%      -0.7        2.95        perf-profile.children.cycles-pp.do_group_exit
      4.03 ±  4%      -0.7        3.35        perf-profile.children.cycles-pp.asm_exc_page_fault
      3.77 ±  4%      -0.6        3.12        perf-profile.children.cycles-pp.exc_page_fault
      2.60            -0.6        1.96        perf-profile.children.cycles-pp.asm_sysvec_apic_timer_interrupt
      6.29 ±  2%      -0.6        5.66 ±  2%  perf-profile.children.cycles-pp.intel_idle
      3.70 ±  4%      -0.6        3.07        perf-profile.children.cycles-pp.do_user_addr_fault
      2.44            -0.6        1.85        perf-profile.children.cycles-pp.sysvec_apic_timer_interrupt
      3.25 ±  4%      -0.6        2.68        perf-profile.children.cycles-pp.handle_mm_fault
      2.86 ±  4%      -0.5        2.32 ±  2%  perf-profile.children.cycles-pp.exit_mm
      2.84 ±  4%      -0.5        2.31 ±  2%  perf-profile.children.cycles-pp.mmput
      3.00 ±  4%      -0.5        2.48        perf-profile.children.cycles-pp.__handle_mm_fault
      2.83 ±  4%      -0.5        2.30 ±  2%  perf-profile.children.cycles-pp.exit_mmap
      2.88 ±  5%      -0.5        2.42        perf-profile.children.cycles-pp.kernel_clone
      1.81            -0.4        1.38        perf-profile.children.cycles-pp.__x64_sys_execve
      1.61            -0.4        1.18        perf-profile.children.cycles-pp.__sysvec_apic_timer_interrupt
      1.81 ±  2%      -0.4        1.38        perf-profile.children.cycles-pp.do_execveat_common
      1.58            -0.4        1.16        perf-profile.children.cycles-pp.hrtimer_interrupt
      2.37 ±  6%      -0.4        2.00        perf-profile.children.cycles-pp.__do_sys_clone
      2.38 ±  6%      -0.4        2.02        perf-profile.children.cycles-pp.copy_process
      1.58 ±  3%      -0.3        1.24 ±  2%  perf-profile.children.cycles-pp.do_sys_openat2
      1.32            -0.3        0.98        perf-profile.children.cycles-pp.__hrtimer_run_queues
      1.56 ±  2%      -0.3        1.23 ±  2%  perf-profile.children.cycles-pp.vm_mmap_pgoff
      1.48 ±  3%      -0.3        1.16 ±  2%  perf-profile.children.cycles-pp.do_filp_open
      1.48 ±  2%      -0.3        1.17 ±  2%  perf-profile.children.cycles-pp.do_mmap
      1.44 ±  3%      -0.3        1.14 ±  2%  perf-profile.children.cycles-pp.path_openat
      1.30 ±  2%      -0.3        1.00        perf-profile.children.cycles-pp.bprm_execve
      1.78 ±  7%      -0.3        1.49        perf-profile.children.cycles-pp.dup_mm
      1.32 ±  2%      -0.3        1.04        perf-profile.children.cycles-pp.mmap_region
      1.40 ±  5%      -0.3        1.14 ±  2%  perf-profile.children.cycles-pp.do_fault
      1.16 ±  2%      -0.3        0.89        perf-profile.children.cycles-pp.search_binary_handler
      1.17 ±  4%      -0.3        0.91 ±  2%  perf-profile.children.cycles-pp.__schedule
      1.16 ±  2%      -0.3        0.90        perf-profile.children.cycles-pp.exec_binprm
      1.22 ±  3%      -0.3        0.96        perf-profile.children.cycles-pp.setlocale
      1.12 ±  2%      -0.3        0.86        perf-profile.children.cycles-pp.load_elf_binary
      1.55 ±  7%      -0.3        1.30        perf-profile.children.cycles-pp.dup_mmap
      1.35 ±  4%      -0.2        1.10 ±  2%  perf-profile.children.cycles-pp.unmap_vmas
      1.05 ±  5%      -0.2        0.81 ±  3%  perf-profile.children.cycles-pp.__x64_sys_openat
      1.27 ±  4%      -0.2        1.04 ±  2%  perf-profile.children.cycles-pp.unmap_page_range
      1.22 ±  4%      -0.2        1.00 ±  2%  perf-profile.children.cycles-pp.zap_pmd_range
      1.22 ±  5%      -0.2        1.00 ±  2%  perf-profile.children.cycles-pp.do_read_fault
      1.19 ±  4%      -0.2        0.97 ±  2%  perf-profile.children.cycles-pp.zap_pte_range
      0.92 ±  2%      -0.2        0.71 ±  2%  perf-profile.children.cycles-pp.handle_softirqs
      1.17 ±  5%      -0.2        0.96        perf-profile.children.cycles-pp.filemap_map_pages
      1.01 ±  2%      -0.2        0.79 ±  2%  perf-profile.children.cycles-pp.ksys_mmap_pgoff
      0.90 ±  2%      -0.2        0.69 ±  2%  perf-profile.children.cycles-pp.tick_nohz_handler
      0.64 ±  5%      -0.2        0.44 ±  2%  perf-profile.children.cycles-pp.ret_from_fork_asm
      0.58 ±  6%      -0.2        0.38 ±  2%  perf-profile.children.cycles-pp.ret_from_fork
      0.89 ±  2%      -0.2        0.69 ±  2%  perf-profile.children.cycles-pp.entry_SYSRETQ_unsafe_stack
      0.99 ±  3%      -0.2        0.80 ±  2%  perf-profile.children.cycles-pp.zap_present_ptes
      0.35 ±  5%      -0.2        0.16 ±  3%  perf-profile.children.cycles-pp.smpboot_thread_fn
      0.42 ±  6%      -0.2        0.24 ±  3%  perf-profile.children.cycles-pp.kthread
      1.56            -0.2        1.38        perf-profile.children.cycles-pp.execve
      0.80 ±  2%      -0.2        0.61 ±  2%  perf-profile.children.cycles-pp.update_process_times
      0.80 ±  3%      -0.2        0.63 ±  3%  perf-profile.children.cycles-pp.schedule
      0.27 ±  5%      -0.2        0.12 ±  4%  perf-profile.children.cycles-pp.run_ksoftirqd
      0.74 ±  6%      -0.2        0.59 ±  3%  perf-profile.children.cycles-pp.__alloc_pages_noprof
      0.79 ±  6%      -0.2        0.64 ±  2%  perf-profile.children.cycles-pp.alloc_pages_mpol_noprof
      0.70 ±  3%      -0.2        0.54 ±  4%  perf-profile.children.cycles-pp.link_path_walk
      0.86 ±  5%      -0.1        0.71 ±  2%  perf-profile.children.cycles-pp.kmem_cache_free
      0.62 ±  6%      -0.1        0.47        perf-profile.children.cycles-pp.copy_page_range
      0.86 ±  3%      -0.1        0.72        perf-profile.children.cycles-pp.kmem_cache_alloc_noprof
      0.62 ±  2%      -0.1        0.48 ±  2%  perf-profile.children.cycles-pp.elf_load
      0.60 ±  6%      -0.1        0.46        perf-profile.children.cycles-pp.copy_p4d_range
      0.73 ±  4%      -0.1        0.59 ±  3%  perf-profile.children.cycles-pp.tlb_finish_mmu
      0.67 ±  5%      -0.1        0.54 ±  3%  perf-profile.children.cycles-pp.__irqentry_text_end
      0.58 ±  4%      -0.1        0.45 ±  4%  perf-profile.children.cycles-pp.pick_next_task_fair
      0.60 ±  6%      -0.1        0.47 ±  3%  perf-profile.children.cycles-pp.__open64_nocancel
      0.41 ±  5%      -0.1        0.28 ±  5%  perf-profile.children.cycles-pp.menu_select
      0.58 ±  6%      -0.1        0.45 ±  3%  perf-profile.children.cycles-pp.get_page_from_freelist
      0.78 ±  5%      -0.1        0.66 ±  2%  perf-profile.children.cycles-pp.__do_sys_brk
      0.62 ±  3%      -0.1        0.49 ±  2%  perf-profile.children.cycles-pp.__do_sys_wait4
      0.60 ±  3%      -0.1        0.48 ±  2%  perf-profile.children.cycles-pp.do_wait
      0.61 ±  3%      -0.1        0.49 ±  2%  perf-profile.children.cycles-pp.kernel_wait4
      0.46 ±  7%      -0.1        0.34 ±  2%  perf-profile.children.cycles-pp.copy_pte_range
      0.55 ±  4%      -0.1        0.43 ±  3%  perf-profile.children.cycles-pp.sched_balance_rq
      0.69 ±  5%      -0.1        0.58 ±  3%  perf-profile.children.cycles-pp.free_pgtables
      0.52 ±  3%      -0.1        0.41 ±  3%  perf-profile.children.cycles-pp.rcu_core
      0.61 ±  3%      -0.1        0.50 ±  4%  perf-profile.children.cycles-pp.__tlb_batch_free_encoded_pages
      0.61 ±  3%      -0.1        0.50 ±  4%  perf-profile.children.cycles-pp.free_pages_and_swap_cache
      0.59 ±  5%      -0.1        0.48        perf-profile.children.cycles-pp.__memcg_slab_post_alloc_hook
      0.49 ±  4%      -0.1        0.38 ±  4%  perf-profile.children.cycles-pp.walk_component
      0.62 ±  5%      -0.1        0.51 ±  4%  perf-profile.children.cycles-pp.do_anonymous_page
      0.41 ±  3%      -0.1        0.31 ±  2%  perf-profile.children.cycles-pp.__split_vma
      0.54 ±  4%      -0.1        0.44 ±  3%  perf-profile.children.cycles-pp.perf_event_mmap
      0.46 ±  3%      -0.1        0.36 ±  2%  perf-profile.children.cycles-pp.sched_tick
      0.54 ±  2%      -0.1        0.44        perf-profile.children.cycles-pp.__mmap
      0.54 ±  2%      -0.1        0.44        perf-profile.children.cycles-pp.__x64_sys_creat
      0.55 ±  6%      -0.1        0.45 ±  2%  perf-profile.children.cycles-pp.next_uptodate_folio
      0.50 ±  4%      -0.1        0.41 ±  4%  perf-profile.children.cycles-pp.sched_balance_newidle
      0.61 ±  4%      -0.1        0.52 ±  3%  perf-profile.children.cycles-pp.__irq_exit_rcu
      0.52 ±  5%      -0.1        0.42 ±  5%  perf-profile.children.cycles-pp.perf_event_mmap_event
      0.42 ±  4%      -0.1        0.33 ±  2%  perf-profile.children.cycles-pp.syscall_return_via_sysret
      0.63 ±  3%      -0.1        0.54 ±  4%  perf-profile.children.cycles-pp.ksys_write
      0.45 ±  3%      -0.1        0.36 ±  3%  perf-profile.children.cycles-pp.rcu_do_batch
      0.50 ±  3%      -0.1        0.41 ±  4%  perf-profile.children.cycles-pp.do_vmi_align_munmap
      0.38 ±  6%      -0.1        0.30 ±  3%  perf-profile.children.cycles-pp.clear_page_erms
      0.61 ±  4%      -0.1        0.52 ±  5%  perf-profile.children.cycles-pp.vfs_write
      0.43 ±  4%      -0.1        0.35 ±  4%  perf-profile.children.cycles-pp.entry_SYSCALL_64
      0.44 ±  4%      -0.1        0.36 ±  3%  perf-profile.children.cycles-pp.folios_put_refs
      0.28 ±  6%      -0.1        0.20 ±  3%  perf-profile.children.cycles-pp.copy_present_ptes
      0.35 ±  4%      -0.1        0.27 ±  3%  perf-profile.children.cycles-pp.syscall_exit_to_user_mode
      0.41 ±  6%      -0.1        0.33 ±  4%  perf-profile.children.cycles-pp.__memcg_slab_free_hook
      0.37 ±  4%      -0.1        0.30 ±  3%  perf-profile.children.cycles-pp.mas_store_prealloc
      0.30 ±  4%      -0.1        0.23 ±  3%  perf-profile.children.cycles-pp.load_elf_interp
      0.38 ±  2%      -0.1        0.30 ±  5%  perf-profile.children.cycles-pp.ksys_read
      0.41 ±  4%      -0.1        0.34 ±  5%  perf-profile.children.cycles-pp.perf_iterate_sb
      0.37            -0.1        0.30 ±  5%  perf-profile.children.cycles-pp.vfs_read
      0.55 ±  3%      -0.1        0.48        perf-profile.children.cycles-pp.wp_page_copy
      0.44 ±  6%      -0.1        0.37 ±  3%  perf-profile.children.cycles-pp.vma_alloc_folio_noprof
      0.22 ±  9%      -0.1        0.15 ±  8%  perf-profile.children.cycles-pp.tick_nohz_get_sleep_length
      0.30 ±  4%      -0.1        0.23        perf-profile.children.cycles-pp.vms_gather_munmap_vmas
      0.43 ±  6%      -0.1        0.36 ±  3%  perf-profile.children.cycles-pp.do_brk_flags
      0.27 ± 12%      -0.1        0.20 ±  5%  perf-profile.children.cycles-pp.d_alloc_parallel
      0.35 ±  3%      -0.1        0.28 ±  5%  perf-profile.children.cycles-pp.sched_balance_find_src_group
      0.34 ±  7%      -0.1        0.28 ±  3%  perf-profile.children.cycles-pp.mm_init
      0.33 ±  3%      -0.1        0.27 ±  6%  perf-profile.children.cycles-pp.update_sd_lb_stats
      0.28 ±  5%      -0.1        0.22 ±  4%  perf-profile.children.cycles-pp.__lookup_slow
      0.33 ±  4%      -0.1        0.27 ±  4%  perf-profile.children.cycles-pp.wake_up_new_task
      0.28 ±  6%      -0.1        0.21 ±  4%  perf-profile.children.cycles-pp.__x64_sys_mprotect
      0.35 ±  3%      -0.1        0.29 ±  3%  perf-profile.children.cycles-pp.__do_sys_newfstatat
      0.34 ±  3%      -0.1        0.27        perf-profile.children.cycles-pp.select_task_rq_fair
      0.38 ±  7%      -0.1        0.31 ±  4%  perf-profile.children.cycles-pp.alloc_anon_folio
      0.41 ±  7%      -0.1        0.34 ±  4%  perf-profile.children.cycles-pp.folio_alloc_mpol_noprof
      0.26 ±  3%      -0.1        0.20 ±  4%  perf-profile.children.cycles-pp.__do_sys_clone3
      0.27 ±  5%      -0.1        0.21 ±  4%  perf-profile.children.cycles-pp.do_mprotect_pkey
      0.40 ±  4%      -0.1        0.34 ±  3%  perf-profile.children.cycles-pp.vms_clear_ptes
      0.30 ±  7%      -0.1        0.24 ±  4%  perf-profile.children.cycles-pp.finish_task_switch
      0.23 ± 10%      -0.1        0.17 ±  5%  perf-profile.children.cycles-pp.open_last_lookups
      0.38 ±  3%      -0.1        0.32 ±  5%  perf-profile.children.cycles-pp.vms_complete_munmap_vmas
      0.16 ± 11%      -0.1        0.10 ±  9%  perf-profile.children.cycles-pp.tick_nohz_next_event
      0.30 ±  4%      -0.1        0.24 ±  4%  perf-profile.children.cycles-pp.folio_remove_rmap_ptes
      0.38 ±  6%      -0.1        0.33 ±  5%  perf-profile.children.cycles-pp.unlink_anon_vmas
      0.23 ±  6%      -0.1        0.18 ±  6%  perf-profile.children.cycles-pp.dequeue_entities
      0.23 ±  5%      -0.1        0.18 ±  3%  perf-profile.children.cycles-pp.mprotect_fixup
      0.29 ±  4%      -0.1        0.24 ±  2%  perf-profile.children.cycles-pp.filename_lookup
      0.21 ±  6%      -0.1        0.16 ±  3%  perf-profile.children.cycles-pp.__cond_resched
      0.26 ±  4%      -0.1        0.21 ±  4%  perf-profile.children.cycles-pp.__vm_munmap
      0.29 ±  3%      -0.1        0.24 ±  3%  perf-profile.children.cycles-pp.do_open
      0.22 ±  6%      -0.1        0.16 ±  5%  perf-profile.children.cycles-pp.do_task_dead
      0.26 ±  5%      -0.1        0.21 ±  2%  perf-profile.children.cycles-pp.native_irq_return_iret
      0.20 ± 12%      -0.1        0.14 ±  8%  perf-profile.children.cycles-pp.perf_mux_hrtimer_handler
      0.25 ±  7%      -0.1        0.20 ±  7%  perf-profile.children.cycles-pp.alloc_empty_file
      0.24 ±  5%      -0.1        0.20 ±  5%  perf-profile.children.cycles-pp.enqueue_task_fair
      0.21 ±  7%      -0.1        0.16 ±  6%  perf-profile.children.cycles-pp.update_load_avg
      0.28 ±  9%      -0.0        0.23 ±  3%  perf-profile.children.cycles-pp.mod_objcg_state
      0.27 ±  4%      -0.0        0.22 ±  6%  perf-profile.children.cycles-pp.update_sg_lb_stats
      0.21 ±  2%      -0.0        0.16 ±  4%  perf-profile.children.cycles-pp.alloc_bprm
      0.26 ±  2%      -0.0        0.21 ±  3%  perf-profile.children.cycles-pp.sched_balance_find_dst_group
      0.28 ±  5%      -0.0        0.24 ±  3%  perf-profile.children.cycles-pp.vfs_fstatat
      0.27 ±  4%      -0.0        0.23 ±  2%  perf-profile.children.cycles-pp.path_lookupat
      0.28 ±  5%      -0.0        0.23        perf-profile.children.cycles-pp.vm_area_dup
      0.19 ±  3%      -0.0        0.14 ±  3%  perf-profile.children.cycles-pp.copy_strings
      0.24 ±  5%      -0.0        0.19 ±  6%  perf-profile.children.cycles-pp.dequeue_task_fair
      0.23 ±  3%      -0.0        0.19 ±  2%  perf-profile.children.cycles-pp.update_sg_wakeup_stats
      0.22 ±  5%      -0.0        0.17 ±  2%  perf-profile.children.cycles-pp.d_alloc
      0.25 ±  3%      -0.0        0.21 ±  5%  perf-profile.children.cycles-pp.do_vmi_munmap
      0.15 ± 17%      -0.0        0.11 ±  8%  perf-profile.children.cycles-pp.lookup_open
      0.25 ±  2%      -0.0        0.21 ±  6%  perf-profile.children.cycles-pp.perf_event_mmap_output
      0.20 ±  7%      -0.0        0.16 ±  2%  perf-profile.children.cycles-pp.set_pte_range
      0.31 ±  5%      -0.0        0.26 ±  4%  perf-profile.children.cycles-pp.down_write
      0.20 ± 10%      -0.0        0.16 ±  4%  perf-profile.children.cycles-pp.mas_walk
      0.22 ±  4%      -0.0        0.18 ±  5%  perf-profile.children.cycles-pp.memset_orig
      0.23 ±  5%      -0.0        0.19 ±  3%  perf-profile.children.cycles-pp.pcpu_alloc_noprof
      0.17 ±  4%      -0.0        0.13 ±  5%  perf-profile.children.cycles-pp.vma_complete
      0.16 ±  3%      -0.0        0.11 ±  4%  perf-profile.children.cycles-pp.get_arg_page
      0.20 ±  8%      -0.0        0.16 ±  4%  perf-profile.children.cycles-pp.__mmdrop
      0.22 ±  8%      -0.0        0.18 ±  4%  perf-profile.children.cycles-pp.__mod_memcg_lruvec_state
      0.22 ±  6%      -0.0        0.18 ±  6%  perf-profile.children.cycles-pp.enqueue_task
      0.24 ±  5%      -0.0        0.20 ±  3%  perf-profile.children.cycles-pp.lock_vma_under_rcu
      0.26 ±  8%      -0.0        0.22 ±  4%  perf-profile.children.cycles-pp.___perf_sw_event
      0.20 ±  4%      -0.0        0.16 ±  2%  perf-profile.children.cycles-pp.error_entry
      0.18 ±  4%      -0.0        0.14 ±  4%  perf-profile.children.cycles-pp.mas_wr_node_store
      0.17 ± 12%      -0.0        0.13 ±  2%  perf-profile.children.cycles-pp.pte_alloc_one
      0.25 ±  4%      -0.0        0.21 ±  2%  perf-profile.children.cycles-pp.vfs_statx
      0.29 ±  4%      -0.0        0.25 ±  3%  perf-profile.children.cycles-pp.__vfork
      0.20 ±  4%      -0.0        0.16 ±  4%  perf-profile.children.cycles-pp.lookup_fast
      0.17 ±  6%      -0.0        0.13 ±  5%  perf-profile.children.cycles-pp.vma_modify_flags
      0.28 ±  8%      -0.0        0.24 ±  5%  perf-profile.children.cycles-pp.__perf_sw_event
      0.15 ±  7%      -0.0        0.12 ±  4%  perf-profile.children.cycles-pp.rep_stos_alternative
      0.14 ±  7%      -0.0        0.10 ±  9%  perf-profile.children.cycles-pp.sched_balance_domains
      0.14 ± 19%      -0.0        0.10 ±  8%  perf-profile.children.cycles-pp.__d_lookup_rcu
      0.18 ±  4%      -0.0        0.14 ±  6%  perf-profile.children.cycles-pp.__d_alloc
      0.07 ± 15%      -0.0        0.04 ± 71%  perf-profile.children.cycles-pp.delay_tsc
      0.24 ±  5%      -0.0        0.20 ±  5%  perf-profile.children.cycles-pp.mas_find
      0.22 ±  4%      -0.0        0.18 ±  2%  perf-profile.children.cycles-pp.dup_task_struct
      0.18 ±  2%      -0.0        0.15 ±  3%  perf-profile.children.cycles-pp.__x64_sys_close
      0.40 ±  9%      -0.0        0.36 ±  3%  perf-profile.children.cycles-pp.anon_vma_fork
      0.08 ±  8%      -0.0        0.05 ± 45%  perf-profile.children.cycles-pp.__get_next_timer_interrupt
      0.24 ±  2%      -0.0        0.20 ±  5%  perf-profile.children.cycles-pp.fstatat64
      0.17 ±  5%      -0.0        0.13 ±  2%  perf-profile.children.cycles-pp.shmem_file_write_iter
      0.16 ±  6%      -0.0        0.13 ±  5%  perf-profile.children.cycles-pp.vma_modify
      0.13 ±  2%      -0.0        0.10 ±  9%  perf-profile.children.cycles-pp.vma_interval_tree_insert
      0.14 ± 11%      -0.0        0.11 ±  7%  perf-profile.children.cycles-pp.ktime_get
      0.15 ±  7%      -0.0        0.11 ±  8%  perf-profile.children.cycles-pp.rep_movs_alternative
      0.17 ±  7%      -0.0        0.14 ±  5%  perf-profile.children.cycles-pp.__get_unmapped_area
      0.22 ±  5%      -0.0        0.18 ±  4%  perf-profile.children.cycles-pp.__slab_free
      0.14 ±  4%      -0.0        0.11 ± 12%  perf-profile.children.cycles-pp.sched_balance_update_blocked_averages
      0.14 ±  5%      -0.0        0.10 ±  4%  perf-profile.children.cycles-pp.step_into
      0.17 ±  7%      -0.0        0.14 ±  5%  perf-profile.children.cycles-pp.__fput
      0.21 ±  6%      -0.0        0.18 ±  4%  perf-profile.children.cycles-pp.copy_mc_enhanced_fast_string
      0.15 ±  6%      -0.0        0.12 ±  8%  perf-profile.children.cycles-pp.__wait_for_common
      0.14 ±  5%      -0.0        0.11 ±  6%  perf-profile.children.cycles-pp.wait_for_completion_state
      0.11 ±  6%      -0.0        0.08 ±  6%  perf-profile.children.cycles-pp.rcu_sched_clock_irq
      0.09 ±  5%      -0.0        0.06 ±  6%  perf-profile.children.cycles-pp.clockevents_program_event
      0.14 ±  5%      -0.0        0.11 ±  8%  perf-profile.children.cycles-pp.__libc_early_init
      0.13 ±  7%      -0.0        0.10 ±  5%  perf-profile.children.cycles-pp.enqueue_entity
      0.14 ±  6%      -0.0        0.11 ±  3%  perf-profile.children.cycles-pp.generic_perform_write
      0.12 ±  6%      -0.0        0.08 ±  5%  perf-profile.children.cycles-pp.get_user_pages_remote
      0.11 ±  8%      -0.0        0.08 ± 12%  perf-profile.children.cycles-pp.sched_balance_softirq
      0.19 ±  5%      -0.0        0.16 ±  2%  perf-profile.children.cycles-pp.__lruvec_stat_mod_folio
      0.18 ±  6%      -0.0        0.14 ±  5%  perf-profile.children.cycles-pp.task_tick_fair
      0.14 ±  5%      -0.0        0.12 ±  4%  perf-profile.children.cycles-pp.sched_move_task
      0.11 ±  6%      -0.0        0.08 ±  4%  perf-profile.children.cycles-pp.__get_user_pages
      0.15 ±  6%      -0.0        0.12 ±  5%  perf-profile.children.cycles-pp.schedule_timeout
      0.17 ±  5%      -0.0        0.14 ±  4%  perf-profile.children.cycles-pp.__do_wait
      0.12 ±  7%      -0.0        0.09 ±  4%  perf-profile.children.cycles-pp.perf_rotate_context
      0.16 ±  9%      -0.0        0.13 ±  2%  perf-profile.children.cycles-pp.flush_tlb_mm_range
      0.21 ±  6%      -0.0        0.18 ±  3%  perf-profile.children.cycles-pp.vma_merge_new_range
      0.14 ±  4%      -0.0        0.11 ±  6%  perf-profile.children.cycles-pp.kmem_cache_alloc_lru_noprof
      0.15 ±  6%      -0.0        0.12 ±  7%  perf-profile.children.cycles-pp.pipe_read
      0.06 ±  6%      -0.0        0.03 ± 70%  perf-profile.children.cycles-pp.strnlen_user
      0.14 ±  7%      -0.0        0.11 ±  5%  perf-profile.children.cycles-pp.vma_prepare
      0.15 ±  3%      -0.0        0.12 ±  6%  perf-profile.children.cycles-pp.__x64_sys_munmap
      0.16 ±  4%      -0.0        0.13 ±  8%  perf-profile.children.cycles-pp.up_write
      0.25 ±  4%      -0.0        0.22 ±  4%  perf-profile.children.cycles-pp.__x64_sys_vfork
      0.13 ±  5%      -0.0        0.10 ±  5%  perf-profile.children.cycles-pp.dequeue_entity
      0.14 ±  5%      -0.0        0.11 ±  4%  perf-profile.children.cycles-pp.setup_arg_pages
      0.11 ±  6%      -0.0        0.08 ±  7%  perf-profile.children.cycles-pp.__mem_cgroup_charge
      0.12 ±  5%      -0.0        0.10 ±  8%  perf-profile.children.cycles-pp.__rseq_handle_notify_resume
      0.12 ±  7%      -0.0        0.10 ±  7%  perf-profile.children.cycles-pp.arch_get_unmapped_area_topdown
      0.15 ±  7%      -0.0        0.13 ±  5%  perf-profile.children.cycles-pp.try_to_wake_up
      0.10 ±  4%      -0.0        0.08 ±  4%  perf-profile.children.cycles-pp.update_rq_clock_task
      0.12 ±  7%      -0.0        0.10 ±  4%  perf-profile.children.cycles-pp.wait_task_zombie
      0.15 ±  4%      -0.0        0.13 ±  6%  perf-profile.children.cycles-pp.__x64_sys_unlink
      0.18 ±  4%      -0.0        0.16 ±  6%  perf-profile.children.cycles-pp.sock_write_iter
      0.15 ±  4%      -0.0        0.12 ±  3%  perf-profile.children.cycles-pp._raw_spin_lock_irqsave
      0.08 ±  8%      -0.0        0.06        perf-profile.children.cycles-pp.copy_string_kernel
      0.10 ±  5%      -0.0        0.07 ±  8%  perf-profile.children.cycles-pp.do_dentry_open
      0.09            -0.0        0.06 ± 11%  perf-profile.children.cycles-pp.enqueue_hrtimer
      0.07 ±  5%      -0.0        0.04 ± 45%  perf-profile.children.cycles-pp.read_tsc
      0.12 ±  3%      -0.0        0.09 ±  5%  perf-profile.children.cycles-pp.begin_new_exec
      0.11 ±  7%      -0.0        0.08 ±  5%  perf-profile.children.cycles-pp.folio_add_file_rmap_ptes
      0.12 ±  6%      -0.0        0.09        perf-profile.children.cycles-pp.relocate_vma_down
      0.10 ± 10%      -0.0        0.07 ±  5%  perf-profile.children.cycles-pp.free_unref_folios
      0.10 ± 10%      -0.0        0.07 ±  9%  perf-profile.children.cycles-pp.irq_enter_rcu
      0.11 ±  4%      -0.0        0.08 ±  5%  perf-profile.children.cycles-pp.vfs_open
      0.07 ± 10%      -0.0        0.04 ± 45%  perf-profile.children.cycles-pp.free_unref_page
      0.06 ±  7%      -0.0        0.04 ± 44%  perf-profile.children.cycles-pp.sched_clock_cpu
      0.10 ±  9%      -0.0        0.08 ±  8%  perf-profile.children.cycles-pp.__vmf_anon_prepare
      0.15 ±  5%      -0.0        0.12 ±  6%  perf-profile.children.cycles-pp.do_unlinkat
      0.13 ±  6%      -0.0        0.10 ±  4%  perf-profile.children.cycles-pp.mas_wr_bnode
      0.10 ±  6%      -0.0        0.08 ±  8%  perf-profile.children.cycles-pp.release_task
      0.10 ±  7%      -0.0        0.08 ±  7%  perf-profile.children.cycles-pp.sched_exec
      0.10 ±  8%      -0.0        0.08 ±  6%  perf-profile.children.cycles-pp.inode_permission
      0.08 ± 10%      -0.0        0.06 ±  6%  perf-profile.children.cycles-pp.irqtime_account_irq
      0.08 ±  9%      -0.0        0.06 ±  9%  perf-profile.children.cycles-pp.pgd_alloc
      0.08 ±  6%      -0.0        0.06 ± 13%  perf-profile.children.cycles-pp.timerqueue_add
      0.08 ±  4%      -0.0        0.06 ±  8%  perf-profile.children.cycles-pp.timerqueue_del
      0.11 ± 10%      -0.0        0.08 ±  5%  perf-profile.children.cycles-pp.__vm_area_free
      0.18 ±  7%      -0.0        0.16 ±  4%  perf-profile.children.cycles-pp.__wp_page_copy_user
      0.10 ±  8%      -0.0        0.07 ±  6%  perf-profile.children.cycles-pp.do_open_execat
      0.09 ±  8%      -0.0        0.06 ±  7%  perf-profile.children.cycles-pp.get_free_pages_noprof
      0.11 ±  5%      -0.0        0.09 ±  7%  perf-profile.children.cycles-pp.rseq_ip_fixup
      0.09 ±  7%      -0.0        0.07 ±  9%  perf-profile.children.cycles-pp.vm_unmapped_area
      0.08 ± 10%      -0.0        0.06 ± 15%  perf-profile.children.cycles-pp.cgroup_rstat_updated
      0.08 ± 11%      -0.0        0.06 ± 11%  perf-profile.children.cycles-pp.tick_irq_enter
      0.10 ± 11%      -0.0        0.08 ±  8%  perf-profile.children.cycles-pp.memcpy_orig
      0.12 ±  7%      -0.0        0.10        perf-profile.children.cycles-pp.alloc_thread_stack_node
      0.08 ±  9%      -0.0        0.06 ±  8%  perf-profile.children.cycles-pp.native_sched_clock
      0.07 ±  9%      -0.0        0.05 ±  7%  perf-profile.children.cycles-pp.__call_rcu_common
      0.07 ±  5%      -0.0        0.05 ±  7%  perf-profile.children.cycles-pp.perf_event_task_output
      0.11 ±  3%      -0.0        0.09 ±  4%  perf-profile.children.cycles-pp.sync
      0.12 ±  7%      -0.0        0.10 ±  3%  perf-profile.children.cycles-pp.arch_do_signal_or_restart
      0.15 ±  3%      -0.0        0.13 ±  5%  perf-profile.children.cycles-pp.folio_batch_move_lru
      0.10 ±  5%      -0.0        0.08        perf-profile.children.cycles-pp.handle_pte_fault
      0.14 ±  2%      -0.0        0.12 ±  3%  perf-profile.children.cycles-pp.vm_area_alloc
      0.07 ±  6%      -0.0        0.06 ±  9%  perf-profile.children.cycles-pp.update_irq_load_avg
      0.09 ±  6%      -0.0        0.07 ±  9%  perf-profile.children.cycles-pp.__put_user_8
      0.07            -0.0        0.05 ±  7%  perf-profile.children.cycles-pp._raw_spin_lock_irq
      0.09 ±  5%      -0.0        0.08 ±  6%  perf-profile.children.cycles-pp.getname_flags
      0.07 ±  8%      -0.0        0.05 ±  7%  perf-profile.children.cycles-pp.unlink_file_vma_batch_final
      0.09 ±  6%      -0.0        0.07 ±  9%  perf-profile.children.cycles-pp.vma_link_file
      0.07 ± 14%      -0.0        0.05 ±  8%  perf-profile.children.cycles-pp.fput
      0.08 ± 10%      -0.0        0.06 ±  6%  perf-profile.children.cycles-pp.shmem_write_begin
      0.06            -0.0        0.04 ± 44%  perf-profile.children.cycles-pp.terminate_walk
      0.10 ±  4%      -0.0        0.08 ±  5%  perf-profile.children.cycles-pp.__x64_sys_sync
      0.12 ±  4%      -0.0        0.10 ±  3%  perf-profile.children.cycles-pp.kill_something_info
      0.10 ±  4%      -0.0        0.08 ±  5%  perf-profile.children.cycles-pp.ksys_sync
      0.08 ± 13%      -0.0        0.06 ±  7%  perf-profile.children.cycles-pp.task_work_run
      0.13            -0.0        0.11 ±  6%  perf-profile.children.cycles-pp.try_to_block_task
      0.07 ± 15%      -0.0        0.05 ±  7%  perf-profile.children.cycles-pp.d_path
      0.07 ±  5%      -0.0        0.05 ±  7%  perf-profile.children.cycles-pp.exec_mmap
      0.07 ± 10%      -0.0        0.05 ±  7%  perf-profile.children.cycles-pp.free_unref_page_commit
      0.10 ±  4%      -0.0        0.09 ±  5%  perf-profile.children.cycles-pp.__percpu_counter_init_many
      0.15 ±  3%      -0.0        0.13 ±  3%  perf-profile.children.cycles-pp.lru_add_drain_cpu
      0.09 ±  8%      -0.0        0.07        perf-profile.children.cycles-pp.shmem_get_folio_gfp
      0.09 ±  4%      -0.0        0.07 ±  9%  perf-profile.children.cycles-pp.try_to_unlazy
      0.08 ±  4%      -0.0        0.06 ±  7%  perf-profile.children.cycles-pp.vma_interval_tree_remove
      0.15 ±  4%      -0.0        0.13 ±  3%  perf-profile.children.cycles-pp.lru_add_drain
      0.08 ±  9%      -0.0        0.06 ±  6%  perf-profile.children.cycles-pp.__pud_alloc
      0.07 ± 10%      -0.0        0.05 ±  8%  perf-profile.children.cycles-pp.create_elf_tables
      0.08 ±  6%      -0.0        0.06 ±  6%  perf-profile.children.cycles-pp.entry_SYSCALL_64_safe_stack
      0.08 ±  8%      -0.0        0.06 ±  7%  perf-profile.children.cycles-pp.shmem_mknod
      0.09 ± 10%      -0.0        0.07 ±  6%  perf-profile.children.cycles-pp.__page_cache_release
      0.08 ±  6%      -0.0        0.06 ±  6%  perf-profile.children.cycles-pp.__memcg_kmem_charge_page
      0.07 ± 11%      -0.0        0.05 ±  8%  perf-profile.children.cycles-pp.pipe_write
      0.20 ±  5%      +0.0        0.22 ±  2%  perf-profile.children.cycles-pp.__close
      0.10 ±  5%      +0.0        0.12 ±  6%  perf-profile.children.cycles-pp.__local_bh_enable_ip
      0.24 ±  5%      +0.0        0.28 ±  3%  perf-profile.children.cycles-pp.__libc_fork
      0.37 ±  4%      +0.0        0.40 ±  2%  perf-profile.children.cycles-pp.read_write_close
      0.09 ±  7%      +0.0        0.12 ±  5%  perf-profile.children.cycles-pp.do_softirq
      0.50 ±  3%      +0.0        0.54        perf-profile.children.cycles-pp.creat64
      0.52 ±  5%      +0.0        0.56 ±  2%  perf-profile.children.cycles-pp.page_test
      0.81 ±  4%      +0.0        0.86 ±  2%  perf-profile.children.cycles-pp.brk
      2.19 ±  2%      +0.1        2.29        perf-profile.children.cycles-pp._Fork
      1.10 ± 31%      +0.5        1.56 ± 11%  perf-profile.children.cycles-pp._raw_spin_lock
      0.73 ± 43%      +0.5        1.24 ± 13%  perf-profile.children.cycles-pp.native_queued_spin_lock_slowpath
      0.92 ± 38%      +0.5        1.43 ± 12%  perf-profile.children.cycles-pp.dcache_readdir
      0.92 ± 37%      +0.5        1.43 ± 12%  perf-profile.children.cycles-pp.__x64_sys_getdents64
      0.92 ± 37%      +0.5        1.43 ± 12%  perf-profile.children.cycles-pp.iterate_dir
      0.81 ± 42%      +0.5        1.34 ± 13%  perf-profile.children.cycles-pp.scan_positives
      0.80 ± 38%      +0.6        1.44 ± 12%  perf-profile.children.cycles-pp.getdents64
     66.36            +5.5       71.91        perf-profile.children.cycles-pp.sieve
     69.78            +5.8       75.54        perf-profile.children.cycles-pp.main
     69.74            +5.8       75.52        perf-profile.children.cycles-pp.multiuser
     69.73            +5.8       75.51        perf-profile.children.cycles-pp.runloop
      6.29 ±  2%      -0.6        5.66 ±  2%  perf-profile.self.cycles-pp.intel_idle
      0.88 ±  2%      -0.2        0.68 ±  3%  perf-profile.self.cycles-pp.entry_SYSRETQ_unsafe_stack
      0.67 ±  5%      -0.1        0.54 ±  2%  perf-profile.self.cycles-pp.__irqentry_text_end
      0.56 ±  3%      -0.1        0.45 ±  2%  perf-profile.self.cycles-pp.zap_present_ptes
      0.41 ±  5%      -0.1        0.32 ±  2%  perf-profile.self.cycles-pp.syscall_return_via_sysret
      0.48 ±  5%      -0.1        0.39 ±  3%  perf-profile.self.cycles-pp.next_uptodate_folio
      0.38 ±  6%      -0.1        0.29 ±  3%  perf-profile.self.cycles-pp.clear_page_erms
      0.40 ±  4%      -0.1        0.32 ±  4%  perf-profile.self.cycles-pp.entry_SYSCALL_64
      0.26 ±  5%      -0.1        0.19 ±  3%  perf-profile.self.cycles-pp.copy_present_ptes
      0.42 ±  9%      -0.1        0.35        perf-profile.self.cycles-pp._raw_spin_lock
      0.31 ±  4%      -0.1        0.25 ±  4%  perf-profile.self.cycles-pp.__memcg_slab_post_alloc_hook
      0.38 ±  6%      -0.1        0.32 ±  5%  perf-profile.self.cycles-pp.filemap_map_pages
      0.26 ±  5%      -0.1        0.21 ±  2%  perf-profile.self.cycles-pp.native_irq_return_iret
      0.20 ±  9%      -0.0        0.15 ±  5%  perf-profile.self.cycles-pp.mas_walk
      0.24 ±  5%      -0.0        0.20 ±  5%  perf-profile.self.cycles-pp.folio_remove_rmap_ptes
      0.28 ±  4%      -0.0        0.24 ±  3%  perf-profile.self.cycles-pp.kmem_cache_alloc_noprof
      0.24 ±  9%      -0.0        0.20 ±  3%  perf-profile.self.cycles-pp.___perf_sw_event
      0.20 ±  4%      -0.0        0.16 ±  2%  perf-profile.self.cycles-pp.error_entry
      0.12 ±  6%      -0.0        0.09 ±  5%  perf-profile.self.cycles-pp.menu_select
      0.14 ± 19%      -0.0        0.10 ±  6%  perf-profile.self.cycles-pp.__d_lookup_rcu
      0.23 ±  5%      -0.0        0.19 ±  6%  perf-profile.self.cycles-pp.update_sg_lb_stats
      0.20 ±  4%      -0.0        0.17 ±  4%  perf-profile.self.cycles-pp.update_sg_wakeup_stats
      0.21 ±  6%      -0.0        0.17 ±  6%  perf-profile.self.cycles-pp.__memcg_slab_free_hook
      0.21 ±  8%      -0.0        0.18 ±  5%  perf-profile.self.cycles-pp.kmem_cache_free
      0.13 ±  4%      -0.0        0.10 ±  8%  perf-profile.self.cycles-pp.vma_interval_tree_insert
      0.19 ±  3%      -0.0        0.16 ±  6%  perf-profile.self.cycles-pp.folios_put_refs
      0.20 ±  7%      -0.0        0.16 ±  7%  perf-profile.self.cycles-pp.__handle_mm_fault
      0.21 ±  6%      -0.0        0.18 ±  5%  perf-profile.self.cycles-pp.__slab_free
      0.20 ±  6%      -0.0        0.17 ±  4%  perf-profile.self.cycles-pp.copy_mc_enhanced_fast_string
      0.16 ±  7%      -0.0        0.13 ±  5%  perf-profile.self.cycles-pp.down_write
      0.16 ±  9%      -0.0        0.13 ±  3%  perf-profile.self.cycles-pp.__mod_memcg_lruvec_state
      0.15 ±  7%      -0.0        0.12 ±  6%  perf-profile.self.cycles-pp.up_write
      0.10 ±  9%      -0.0        0.07 ±  5%  perf-profile.self.cycles-pp.cpuidle_enter_state
      0.21 ±  4%      -0.0        0.18 ±  5%  perf-profile.self.cycles-pp.memset_orig
      0.15 ±  7%      -0.0        0.12 ±  7%  perf-profile.self.cycles-pp.zap_pte_range
      0.16 ±  3%      -0.0        0.13 ±  5%  perf-profile.self.cycles-pp.free_pages_and_swap_cache
      0.12 ±  9%      -0.0        0.09 ± 10%  perf-profile.self.cycles-pp.rep_movs_alternative
      0.10 ±  6%      -0.0        0.07 ±  9%  perf-profile.self.cycles-pp.update_load_avg
      0.10 ±  4%      -0.0        0.08 ±  6%  perf-profile.self.cycles-pp.folio_add_file_rmap_ptes
      0.08 ±  9%      -0.0        0.06 ± 13%  perf-profile.self.cycles-pp.cgroup_rstat_updated
      0.08 ±  4%      -0.0        0.06 ± 11%  perf-profile.self.cycles-pp.mas_wr_node_store
      0.09 ±  9%      -0.0        0.07 ±  5%  perf-profile.self.cycles-pp.__cond_resched
      0.08 ±  7%      -0.0        0.06 ±  6%  perf-profile.self.cycles-pp.set_pte_range
      0.08 ±  8%      -0.0        0.06 ±  6%  perf-profile.self.cycles-pp.vma_interval_tree_remove
      0.11 ±  7%      -0.0        0.09 ±  5%  perf-profile.self.cycles-pp._raw_spin_lock_irqsave
      0.07 ±  6%      -0.0        0.06 ±  6%  perf-profile.self.cycles-pp.percpu_counter_add_batch
      0.07 ±  7%      -0.0        0.05 ±  7%  perf-profile.self.cycles-pp._raw_spin_lock_irq
      0.07 ±  7%      -0.0        0.05 ±  8%  perf-profile.self.cycles-pp.update_irq_load_avg
      0.06 ±  7%      -0.0        0.05 ±  8%  perf-profile.self.cycles-pp.__task_pid_nr_ns
      0.72 ± 43%      +0.5        1.23 ± 13%  perf-profile.self.cycles-pp.native_queued_spin_lock_slowpath
     65.88            +5.5       71.36        perf-profile.self.cycles-pp.sieve
      0.06 ± 62%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.__alloc_pages_noprof.alloc_pages_mpol_noprof.__pmd_alloc.__handle_mm_fault
      0.07 ± 25%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.__alloc_pages_noprof.alloc_pages_mpol_noprof.__pmd_alloc.copy_p4d_range
      0.12 ± 31%     -95.3%       0.01 ±223%  perf-sched.sch_delay.avg.ms.__cond_resched.__alloc_pages_noprof.alloc_pages_mpol_noprof.folio_alloc_mpol_noprof.shmem_alloc_folio
      0.05 ± 72%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.__alloc_pages_noprof.alloc_pages_mpol_noprof.get_free_pages_noprof.pgd_alloc
      0.07 ± 14%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.__alloc_pages_noprof.alloc_pages_mpol_noprof.pte_alloc_one.__pte_alloc
      0.10 ± 81%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.__anon_vma_prepare.__vmf_anon_prepare.do_anonymous_page.__handle_mm_fault
      0.06 ± 81%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.__anon_vma_prepare.__vmf_anon_prepare.do_fault.__handle_mm_fault
      0.10 ± 83%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.__dentry_kill.dput.step_into.link_path_walk
      0.03 ± 63%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.__get_user_pages.get_user_pages_remote.get_arg_page.copy_strings
      0.12 ± 44%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.__kmalloc_cache_node_noprof.__get_vm_area_node.__vmalloc_node_range_noprof.alloc_thread_stack_node
      0.08 ± 27%     -97.4%       0.00 ±223%  perf-sched.sch_delay.avg.ms.__cond_resched.__kmalloc_cache_noprof.perf_event_mmap_event.perf_event_mmap.mmap_region
      0.02 ± 35%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.__kmalloc_noprof.load_elf_phdrs.load_elf_binary.search_binary_handler
      0.13 ± 60%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.__kmalloc_noprof.lsm_blob_alloc.security_prepare_creds.prepare_creds
      0.08 ± 28%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.__kmalloc_noprof.lsm_blob_alloc.security_task_alloc.copy_process
      0.08 ± 75%     -86.8%       0.01 ±187%  perf-sched.sch_delay.avg.ms.__cond_resched.__kmalloc_noprof.security_inode_init_security.shmem_mknod.lookup_open
      0.05 ± 26%     -96.9%       0.00 ±142%  perf-sched.sch_delay.avg.ms.__cond_resched.__put_anon_vma.unlink_anon_vmas.free_pgtables.exit_mmap
      0.17 ± 57%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.__release_sock.release_sock.tcp_sendmsg.sock_write_iter
      0.07 ± 10%     -99.5%       0.00 ±223%  perf-sched.sch_delay.avg.ms.__cond_resched.__tlb_batch_free_encoded_pages.tlb_finish_mmu.exit_mmap.mmput
      0.05 ± 58%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.__tlb_batch_free_encoded_pages.tlb_finish_mmu.relocate_vma_down.setup_arg_pages
      0.08 ± 50%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.__vmalloc_area_node.__vmalloc_node_range_noprof.alloc_thread_stack_node.dup_task_struct
      0.06 ±  8%   +2931.0%       1.82 ±  4%  perf-sched.sch_delay.avg.ms.__cond_resched.__wait_for_common.affine_move_task.__set_cpus_allowed_ptr.__sched_setaffinity
      0.10 ± 28%     -86.9%       0.01 ±144%  perf-sched.sch_delay.avg.ms.__cond_resched.__wait_for_common.wait_for_completion_state.kernel_clone.__x64_sys_vfork
      0.15 ± 54%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.aa_sk_perm.security_socket_recvmsg.sock_recvmsg.sock_read_iter
      0.14 ± 33%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.aa_sk_perm.security_socket_sendmsg.sock_write_iter.vfs_write
      0.06 ± 46%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.apparmor_file_alloc_security.security_file_alloc.init_file.alloc_empty_file
      0.10 ± 57%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.cgroup_css_set_fork.cgroup_can_fork.copy_process.kernel_clone
      0.12 ± 61%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.change_pmd_range.isra.0.change_pud_range
      0.06 ± 41%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.change_pud_range.isra.0.change_protection_range
      0.10 ± 34%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.copy_page_range.dup_mmap.dup_mm.constprop
      0.07 ± 64%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.copy_strings.isra.0.do_execveat_common
      0.15 ± 85%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.count.constprop.0.isra
      0.06 ± 53%     -97.1%       0.00 ±145%  perf-sched.sch_delay.avg.ms.__cond_resched.down_read.acct_collect.do_exit.do_group_exit
      0.10 ± 23%     -97.3%       0.00 ±223%  perf-sched.sch_delay.avg.ms.__cond_resched.down_read.walk_component.link_path_walk.part
      0.11 ± 50%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.down_read.walk_component.path_lookupat.filename_lookup
      0.13 ±132%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.down_write.__anon_vma_prepare.__vmf_anon_prepare.do_anonymous_page
      0.04 ±122%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.down_write.anon_vma_clone.__split_vma.vma_modify
      0.09 ± 18%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.down_write.anon_vma_clone.anon_vma_fork.dup_mmap
      0.08 ± 24%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.down_write.anon_vma_fork.dup_mmap.dup_mm
      0.16 ± 49%     -89.1%       0.02 ±184%  perf-sched.sch_delay.avg.ms.__cond_resched.down_write.do_truncate.do_open.path_openat
      0.08 ± 16%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.down_write.free_pgtables.exit_mmap.mmput
      0.08 ± 41%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.down_write.mmap_region.do_mmap.vm_mmap_pgoff
      0.02 ± 46%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.down_write.shmem_file_write_iter.vfs_write.ksys_write
      0.10 ± 62%     -99.0%       0.00 ±223%  perf-sched.sch_delay.avg.ms.__cond_resched.down_write.unlink_anon_vmas.free_pgtables.exit_mmap
      0.07 ± 36%     -96.1%       0.00 ±155%  perf-sched.sch_delay.avg.ms.__cond_resched.down_write.vma_link_file.mmap_region.do_mmap
      0.11 ±150%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.down_write.vma_prepare.__split_vma.vma_modify
      0.11 ± 45%     -76.5%       0.02 ±173%  perf-sched.sch_delay.avg.ms.__cond_resched.down_write.vma_prepare.commit_merge.vma_expand
      0.14 ± 67%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.down_write.vms_gather_munmap_vmas.do_vmi_align_munmap.do_vmi_munmap
      0.05 ±120%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.down_write_killable.create_elf_tables.load_elf_binary.search_binary_handler
      0.02 ± 16%     -68.9%       0.01 ±112%  perf-sched.sch_delay.avg.ms.__cond_resched.down_write_killable.exec_mmap.begin_new_exec.load_elf_binary
      0.05 ± 51%     -97.5%       0.00 ±223%  perf-sched.sch_delay.avg.ms.__cond_resched.down_write_killable.setup_arg_pages.load_elf_binary.search_binary_handler
      0.03 ± 89%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.down_write_killable.vm_brk_flags.elf_load.load_elf_binary
      0.06 ± 58%     -98.7%       0.00 ±223%  perf-sched.sch_delay.avg.ms.__cond_resched.down_write_killable.vm_mmap_pgoff.elf_load.load_elf_binary
      0.10 ± 33%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.down_write_killable.vm_mmap_pgoff.ksys_mmap_pgoff.do_syscall_64
      0.14 ± 70%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.dput.do_linkat.__x64_sys_link.do_syscall_64
      0.09 ±110%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.dput.path_put.exit_fs.do_exit
      0.14 ± 33%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.dput.path_put.vfs_statx.vfs_fstatat
      0.08 ± 17%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.dput.step_into.link_path_walk.part
      0.13 ± 65%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.dput.step_into.open_last_lookups.path_openat
      0.11 ± 80%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.dput.step_into.path_lookupat.filename_lookup
      0.14 ± 73%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.dput.terminate_walk.path_lookupat.filename_lookup
      0.04 ±130%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.exit_signals.do_exit.do_group_exit.__x64_sys_exit_group
      0.05 ± 54%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.filemap_read.__kernel_read.search_binary_handler.exec_binprm
      0.14 ± 20%     -87.1%       0.02 ±150%  perf-sched.sch_delay.avg.ms.__cond_resched.generic_perform_write.shmem_file_write_iter.vfs_write.ksys_write
      0.09 ± 41%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.kmem_cache_alloc_lru_noprof.__d_alloc.d_alloc.d_alloc_parallel
      0.07 ± 39%     -93.0%       0.00 ±223%  perf-sched.sch_delay.avg.ms.__cond_resched.kmem_cache_alloc_node_noprof.__alloc_skb.alloc_skb_with_frags.sock_alloc_send_pskb
      0.11 ± 72%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.kmem_cache_alloc_node_noprof.dup_task_struct.copy_process.kernel_clone
      0.10 ± 35%     -93.8%       0.01 ±182%  perf-sched.sch_delay.avg.ms.__cond_resched.kmem_cache_alloc_noprof.alloc_pid.copy_process.kernel_clone
      0.10 ± 25%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.kmem_cache_alloc_noprof.anon_vma_fork.dup_mmap.dup_mm
      0.09 ± 37%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.kmem_cache_alloc_noprof.copy_fs_struct.copy_process.kernel_clone
      0.07 ± 92%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.kmem_cache_alloc_noprof.copy_signal.copy_process.kernel_clone
      0.05 ± 54%     -96.3%       0.00 ±223%  perf-sched.sch_delay.avg.ms.__cond_resched.kmem_cache_alloc_noprof.dup_mm.constprop.0
      0.14 ± 88%     -97.8%       0.00 ±223%  perf-sched.sch_delay.avg.ms.__cond_resched.kmem_cache_alloc_noprof.mas_alloc_nodes.mas_preallocate.__split_vma
      0.08 ± 25%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.kmem_cache_alloc_noprof.mas_alloc_nodes.mas_preallocate.mmap_region
      0.08 ±121%     -99.2%       0.00 ±223%  perf-sched.sch_delay.avg.ms.__cond_resched.kmem_cache_alloc_noprof.mas_alloc_nodes.mas_preallocate.vma_link
      0.09 ± 18%     -93.4%       0.01 ±223%  perf-sched.sch_delay.avg.ms.__cond_resched.kmem_cache_alloc_noprof.prepare_creds.copy_creds.copy_process
      0.08 ± 49%     -90.9%       0.01 ±223%  perf-sched.sch_delay.avg.ms.__cond_resched.kmem_cache_alloc_noprof.security_inode_alloc.inode_init_always.alloc_inode
      0.03 ±106%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.kmem_cache_alloc_noprof.vm_area_alloc.__bprm_mm_init.alloc_bprm
      0.07 ± 85%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.kmem_cache_alloc_noprof.vm_area_alloc._install_special_mapping.map_vdso
      0.10 ± 72%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.kmem_cache_alloc_noprof.vm_area_alloc.do_brk_flags.__do_sys_brk
      0.09 ± 77%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.kmem_cache_alloc_noprof.vm_area_dup.__split_vma.vma_modify
      0.08 ± 41%     -91.9%       0.01 ±148%  perf-sched.sch_delay.avg.ms.__cond_resched.kmem_cache_alloc_noprof.vm_area_dup.__split_vma.vms_gather_munmap_vmas
      0.09 ± 11%     -92.8%       0.01 ±223%  perf-sched.sch_delay.avg.ms.__cond_resched.kmem_cache_alloc_noprof.vm_area_dup.dup_mmap.dup_mm
      0.11 ±112%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.mmput.exec_mmap.begin_new_exec.load_elf_binary
      0.09 ± 96%     -97.5%       0.00 ±223%  perf-sched.sch_delay.avg.ms.__cond_resched.mnt_want_write.do_unlinkat.__x64_sys_unlink.do_syscall_64
      0.08 ± 74%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.mnt_want_write.open_last_lookups.path_openat.do_filp_open
      0.02 ± 56%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.mutex_lock.futex_exec_release.exec_mm_release.exec_mmap
      0.06 ± 49%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.mutex_lock.futex_exit_release.exit_mm_release.exit_mm
      0.11 ± 68%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.mutex_lock.pipe_read.vfs_read.ksys_read
      0.06 ± 36%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.mutex_lock_killable.pcpu_alloc_noprof.__percpu_counter_init_many.mm_init
      0.02 ± 63%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.mutex_lock_killable.pcpu_alloc_noprof.mm_init.alloc_bprm
      0.07 ± 24%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.mutex_lock_killable.pcpu_alloc_noprof.mm_init.dup_mm
      0.09 ± 39%     -99.5%       0.00 ±223%  perf-sched.sch_delay.avg.ms.__cond_resched.put_files_struct.do_exit.do_group_exit.__x64_sys_exit_group
      0.06 ± 24%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.remove_vma.exit_mmap.mmput.exit_mm
      0.24 ±211%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.remove_vma.vms_complete_munmap_vmas.do_vmi_align_munmap.do_vmi_munmap
      0.08 ± 68%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.remove_vma.vms_complete_munmap_vmas.mmap_region.do_mmap
      0.09 ± 90%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.shmem_get_folio_gfp.shmem_file_read_iter.vfs_read.ksys_read
      0.08 ±118%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.shmem_get_folio_gfp.shmem_write_begin.generic_perform_write.shmem_file_write_iter
      0.14 ± 72%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.shmem_undo_range.shmem_evict_inode.evict.__dentry_kill
      0.06 ±  6%    +217.5%       0.19 ± 16%  perf-sched.sch_delay.avg.ms.__cond_resched.stop_one_cpu.sched_exec.bprm_execve.part
      0.11 ± 30%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.super_lock.iterate_supers.ksys_sync.__x64_sys_sync
      0.15 ±162%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.switch_task_namespaces.do_exit.do_group_exit.__x64_sys_exit_group
      0.06 ± 71%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.task_work_run.do_exit.do_group_exit.__x64_sys_exit_group
      0.18 ± 88%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.task_work_run.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe
      0.06 ± 30%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.unmap_vmas.exit_mmap.mmput.exit_mm
      0.07 ± 60%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.unmap_vmas.vms_clear_ptes.vms_clean_up_area.mmap_region
      0.13 ± 73%     -99.3%       0.00 ±223%  perf-sched.sch_delay.avg.ms.__cond_resched.unmap_vmas.vms_clear_ptes.vms_complete_munmap_vmas.mmap_region
      0.09 ± 27%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.__cond_resched.uprobe_start_dup_mmap.dup_mmap.dup_mm.constprop
      0.12           +18.7%       0.15        perf-sched.sch_delay.avg.ms.do_wait.kernel_wait4.__do_sys_wait4.do_syscall_64
      0.09 ± 16%     -44.7%       0.05 ± 57%  perf-sched.sch_delay.avg.ms.irqentry_exit_to_user_mode.asm_sysvec_apic_timer_interrupt.[unknown]
      0.12 ±  4%     -23.5%       0.10 ± 15%  perf-sched.sch_delay.avg.ms.irqentry_exit_to_user_mode.asm_sysvec_apic_timer_interrupt.[unknown].[unknown]
      0.09 ± 24%     -97.0%       0.00 ±223%  perf-sched.sch_delay.avg.ms.irqentry_exit_to_user_mode.asm_sysvec_call_function_single.[unknown]
      0.11 ± 14%     -93.8%       0.01 ±182%  perf-sched.sch_delay.avg.ms.irqentry_exit_to_user_mode.asm_sysvec_call_function_single.[unknown].[unknown]
      0.09 ± 26%     -92.4%       0.01 ±223%  perf-sched.sch_delay.avg.ms.irqentry_exit_to_user_mode.asm_sysvec_reschedule_ipi.[unknown]
      0.14 ±  2%     -42.0%       0.08 ± 37%  perf-sched.sch_delay.avg.ms.irqentry_exit_to_user_mode.asm_sysvec_reschedule_ipi.[unknown].[unknown]
      0.06 ±  2%     +19.8%       0.08 ±  3%  perf-sched.sch_delay.avg.ms.pipe_read.vfs_read.ksys_read.do_syscall_64
      0.06 ± 12%    +188.2%       0.17 ± 40%  perf-sched.sch_delay.avg.ms.schedule_hrtimeout_range_clock.ep_poll.do_epoll_wait.__x64_sys_epoll_wait
      0.01 ± 72%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.schedule_preempt_disabled.rwsem_down_write_slowpath.down_write.unlink_file_vma_batch_final
      0.08 ±  2%     +74.7%       0.13 ±  5%  perf-sched.sch_delay.avg.ms.schedule_timeout.__wait_for_common.wait_for_completion_state.kernel_clone
      0.02 ± 22%   +9365.4%       2.15 ± 43%  perf-sched.sch_delay.avg.ms.schedule_timeout.io_schedule_timeout.__wait_for_common.blk_execute_rq
      0.07 ± 28%    +480.6%       0.39 ± 57%  perf-sched.sch_delay.avg.ms.schedule_timeout.kcompactd.kthread.ret_from_fork
      0.04 ±  2%    +417.9%       0.22 ±  5%  perf-sched.sch_delay.avg.ms.schedule_timeout.rcu_gp_fqs_loop.rcu_gp_kthread.kthread
      0.02 ±  6%   +4557.3%       0.91        perf-sched.sch_delay.avg.ms.smpboot_thread_fn.kthread.ret_from_fork.ret_from_fork_asm
      0.04 ± 36%     -97.3%       0.00 ±223%  perf-sched.sch_delay.avg.ms.syscall_exit_to_user_mode.ret_from_fork_asm.[unknown]
      0.10 ± 35%    -100.0%       0.00        perf-sched.sch_delay.avg.ms.syscall_exit_to_user_mode.ret_from_fork_asm.[unknown].[unknown]
      0.05 ±  4%   +2256.8%       1.22 ±  3%  perf-sched.sch_delay.avg.ms.worker_thread.kthread.ret_from_fork.ret_from_fork_asm
      0.27 ± 80%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.__alloc_pages_noprof.alloc_pages_mpol_noprof.__pmd_alloc.__handle_mm_fault
      0.26 ± 19%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.__alloc_pages_noprof.alloc_pages_mpol_noprof.__pmd_alloc.copy_p4d_range
      0.29 ± 38%     -98.0%       0.01 ±223%  perf-sched.sch_delay.max.ms.__cond_resched.__alloc_pages_noprof.alloc_pages_mpol_noprof.folio_alloc_mpol_noprof.shmem_alloc_folio
      0.10 ± 80%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.__alloc_pages_noprof.alloc_pages_mpol_noprof.get_free_pages_noprof.pgd_alloc
      0.41 ± 34%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.__alloc_pages_noprof.alloc_pages_mpol_noprof.pte_alloc_one.__pte_alloc
      0.47 ±105%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.__anon_vma_prepare.__vmf_anon_prepare.do_anonymous_page.__handle_mm_fault
      0.17 ± 90%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.__anon_vma_prepare.__vmf_anon_prepare.do_fault.__handle_mm_fault
      0.31 ± 74%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.__dentry_kill.dput.step_into.link_path_walk
      0.49 ± 69%     -94.2%       0.03 ±223%  perf-sched.sch_delay.max.ms.__cond_resched.__do_fault.do_read_fault.do_fault.__handle_mm_fault
      0.07 ± 73%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.__get_user_pages.get_user_pages_remote.get_arg_page.copy_strings
      0.22 ± 23%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.__kmalloc_cache_node_noprof.__get_vm_area_node.__vmalloc_node_range_noprof.alloc_thread_stack_node
      0.57 ± 42%     -99.6%       0.00 ±223%  perf-sched.sch_delay.max.ms.__cond_resched.__kmalloc_cache_noprof.perf_event_mmap_event.perf_event_mmap.mmap_region
      0.07 ± 31%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.__kmalloc_noprof.load_elf_phdrs.load_elf_binary.search_binary_handler
      0.49 ± 60%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.__kmalloc_noprof.lsm_blob_alloc.security_prepare_creds.prepare_creds
      0.21 ± 36%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.__kmalloc_noprof.lsm_blob_alloc.security_task_alloc.copy_process
      0.13 ± 82%     -91.7%       0.01 ±187%  perf-sched.sch_delay.max.ms.__cond_resched.__kmalloc_noprof.security_inode_init_security.shmem_mknod.lookup_open
      0.66 ± 41%     -99.8%       0.00 ±142%  perf-sched.sch_delay.max.ms.__cond_resched.__put_anon_vma.unlink_anon_vmas.free_pgtables.exit_mmap
      0.23 ± 37%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.__release_sock.release_sock.tcp_sendmsg.sock_write_iter
      1.38 ± 46%    -100.0%       0.00 ±223%  perf-sched.sch_delay.max.ms.__cond_resched.__tlb_batch_free_encoded_pages.tlb_finish_mmu.exit_mmap.mmput
      0.19 ± 84%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.__tlb_batch_free_encoded_pages.tlb_finish_mmu.relocate_vma_down.setup_arg_pages
      0.25 ± 60%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.__vmalloc_area_node.__vmalloc_node_range_noprof.alloc_thread_stack_node.dup_task_struct
      2.77 ± 42%    +823.0%      25.54 ±  6%  perf-sched.sch_delay.max.ms.__cond_resched.__wait_for_common.affine_move_task.__set_cpus_allowed_ptr.__sched_setaffinity
      1.08 ± 29%     -98.8%       0.01 ±144%  perf-sched.sch_delay.max.ms.__cond_resched.__wait_for_common.wait_for_completion_state.kernel_clone.__x64_sys_vfork
      0.37 ± 65%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.aa_sk_perm.security_socket_recvmsg.sock_recvmsg.sock_read_iter
      0.28 ± 51%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.aa_sk_perm.security_socket_sendmsg.sock_write_iter.vfs_write
      0.19 ± 60%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.apparmor_file_alloc_security.security_file_alloc.init_file.alloc_empty_file
      0.59 ±149%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.cgroup_css_set_fork.cgroup_can_fork.copy_process.kernel_clone
      0.18 ± 70%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.change_pmd_range.isra.0.change_pud_range
      0.41 ± 43%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.change_pud_range.isra.0.change_protection_range
      0.22 ± 35%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.copy_page_range.dup_mmap.dup_mm.constprop
      0.82 ± 30%     -95.8%       0.03 ±207%  perf-sched.sch_delay.max.ms.__cond_resched.copy_pte_range.copy_p4d_range.copy_page_range.dup_mmap
      0.50 ± 54%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.copy_strings.isra.0.do_execveat_common
      0.34 ± 93%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.count.constprop.0.isra
      0.56 ± 64%     -99.7%       0.00 ±145%  perf-sched.sch_delay.max.ms.__cond_resched.down_read.acct_collect.do_exit.do_group_exit
      0.58 ± 40%     -99.5%       0.00 ±223%  perf-sched.sch_delay.max.ms.__cond_resched.down_read.walk_component.link_path_walk.part
      0.35 ± 85%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.down_read.walk_component.path_lookupat.filename_lookup
      0.19 ± 87%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.down_write.__anon_vma_prepare.__vmf_anon_prepare.do_anonymous_page
      0.11 ±145%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.down_write.anon_vma_clone.__split_vma.vma_modify
      0.38 ± 21%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.down_write.anon_vma_clone.anon_vma_fork.dup_mmap
      0.22 ± 21%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.down_write.anon_vma_fork.dup_mmap.dup_mm
      0.87 ±110%     -97.0%       0.03 ±197%  perf-sched.sch_delay.max.ms.__cond_resched.down_write.do_truncate.do_open.path_openat
      0.48 ± 42%     -93.4%       0.03 ±223%  perf-sched.sch_delay.max.ms.__cond_resched.down_write.dup_mmap.dup_mm.constprop
      1.03 ± 50%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.down_write.free_pgtables.exit_mmap.mmput
      0.27 ± 45%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.down_write.mmap_region.do_mmap.vm_mmap_pgoff
      0.02 ± 43%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.down_write.shmem_file_write_iter.vfs_write.ksys_write
      0.50 ± 48%     -99.8%       0.00 ±223%  perf-sched.sch_delay.max.ms.__cond_resched.down_write.unlink_anon_vmas.free_pgtables.exit_mmap
      0.37 ± 46%     -99.3%       0.00 ±155%  perf-sched.sch_delay.max.ms.__cond_resched.down_write.vma_link_file.mmap_region.do_mmap
      0.27 ±147%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.down_write.vma_prepare.__split_vma.vma_modify
      0.24 ± 83%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.down_write.vms_gather_munmap_vmas.do_vmi_align_munmap.do_vmi_munmap
      0.17 ±154%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.down_write_killable.create_elf_tables.load_elf_binary.search_binary_handler
      1.06 ± 27%     -99.2%       0.01 ±125%  perf-sched.sch_delay.max.ms.__cond_resched.down_write_killable.exec_mmap.begin_new_exec.load_elf_binary
      0.17 ± 52%     -99.2%       0.00 ±223%  perf-sched.sch_delay.max.ms.__cond_resched.down_write_killable.setup_arg_pages.load_elf_binary.search_binary_handler
      0.07 ±120%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.down_write_killable.vm_brk_flags.elf_load.load_elf_binary
      0.30 ± 71%     -99.7%       0.00 ±223%  perf-sched.sch_delay.max.ms.__cond_resched.down_write_killable.vm_mmap_pgoff.elf_load.load_elf_binary
      0.41 ± 44%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.down_write_killable.vm_mmap_pgoff.ksys_mmap_pgoff.do_syscall_64
      0.30 ± 55%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.dput.do_linkat.__x64_sys_link.do_syscall_64
      0.26 ± 79%     -88.4%       0.03 ±223%  perf-sched.sch_delay.max.ms.__cond_resched.dput.open_last_lookups.path_openat.do_filp_open
      0.24 ±126%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.dput.path_put.exit_fs.do_exit
      0.44 ± 41%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.dput.path_put.vfs_statx.vfs_fstatat
      1.12 ± 34%     -84.3%       0.18 ± 53%  perf-sched.sch_delay.max.ms.__cond_resched.dput.scan_positives.dcache_readdir.iterate_dir
      0.22 ± 45%     -82.5%       0.04 ±210%  perf-sched.sch_delay.max.ms.__cond_resched.dput.shmem_unlink.vfs_unlink.do_unlinkat
      0.61 ± 43%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.dput.step_into.link_path_walk.part
      0.23 ± 46%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.dput.step_into.open_last_lookups.path_openat
      0.38 ± 97%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.dput.step_into.path_lookupat.filename_lookup
      0.33 ± 88%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.dput.terminate_walk.path_lookupat.filename_lookup
      0.91 ±116%     -88.9%       0.10 ± 95%  perf-sched.sch_delay.max.ms.__cond_resched.dput.terminate_walk.path_openat.do_filp_open
      1.21 ± 37%     -94.3%       0.07 ±218%  perf-sched.sch_delay.max.ms.__cond_resched.exit_mmap.mmput.exit_mm.do_exit
      0.09 ±151%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.exit_signals.do_exit.do_group_exit.__x64_sys_exit_group
      0.28 ± 75%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.filemap_read.__kernel_read.search_binary_handler.exec_binprm
      0.89 ± 14%     -93.2%       0.06 ±146%  perf-sched.sch_delay.max.ms.__cond_resched.generic_perform_write.shmem_file_write_iter.vfs_write.ksys_write
      0.30 ± 35%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.kmem_cache_alloc_lru_noprof.__d_alloc.d_alloc.d_alloc_parallel
      0.13 ± 63%     -96.3%       0.00 ±223%  perf-sched.sch_delay.max.ms.__cond_resched.kmem_cache_alloc_node_noprof.__alloc_skb.alloc_skb_with_frags.sock_alloc_send_pskb
      0.19 ± 62%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.kmem_cache_alloc_node_noprof.dup_task_struct.copy_process.kernel_clone
      0.31 ± 54%     -98.1%       0.01 ±182%  perf-sched.sch_delay.max.ms.__cond_resched.kmem_cache_alloc_noprof.alloc_pid.copy_process.kernel_clone
      0.98 ±112%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.kmem_cache_alloc_noprof.anon_vma_fork.dup_mmap.dup_mm
      0.40 ± 59%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.kmem_cache_alloc_noprof.copy_fs_struct.copy_process.kernel_clone
      0.08 ± 77%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.kmem_cache_alloc_noprof.copy_signal.copy_process.kernel_clone
      0.09 ± 75%     -98.2%       0.00 ±223%  perf-sched.sch_delay.max.ms.__cond_resched.kmem_cache_alloc_noprof.dup_mm.constprop.0
      0.51 ± 89%     -99.4%       0.00 ±223%  perf-sched.sch_delay.max.ms.__cond_resched.kmem_cache_alloc_noprof.mas_alloc_nodes.mas_preallocate.__split_vma
      0.51 ± 57%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.kmem_cache_alloc_noprof.mas_alloc_nodes.mas_preallocate.mmap_region
      0.13 ±102%     -99.5%       0.00 ±223%  perf-sched.sch_delay.max.ms.__cond_resched.kmem_cache_alloc_noprof.mas_alloc_nodes.mas_preallocate.vma_link
      0.58 ± 34%     -98.9%       0.01 ±223%  perf-sched.sch_delay.max.ms.__cond_resched.kmem_cache_alloc_noprof.prepare_creds.copy_creds.copy_process
      0.22 ± 61%     -96.7%       0.01 ±223%  perf-sched.sch_delay.max.ms.__cond_resched.kmem_cache_alloc_noprof.security_inode_alloc.inode_init_always.alloc_inode
      0.08 ± 77%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.kmem_cache_alloc_noprof.vm_area_alloc.__bprm_mm_init.alloc_bprm
      0.27 ±117%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.kmem_cache_alloc_noprof.vm_area_alloc._install_special_mapping.map_vdso
      0.17 ± 80%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.kmem_cache_alloc_noprof.vm_area_alloc.do_brk_flags.__do_sys_brk
      0.93 ± 31%     -95.1%       0.05 ±219%  perf-sched.sch_delay.max.ms.__cond_resched.kmem_cache_alloc_noprof.vm_area_alloc.mmap_region.do_mmap
      0.35 ± 62%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.kmem_cache_alloc_noprof.vm_area_dup.__split_vma.vma_modify
      0.29 ± 33%     -97.0%       0.01 ±159%  perf-sched.sch_delay.max.ms.__cond_resched.kmem_cache_alloc_noprof.vm_area_dup.__split_vma.vms_gather_munmap_vmas
      0.64 ± 43%     -99.0%       0.01 ±223%  perf-sched.sch_delay.max.ms.__cond_resched.kmem_cache_alloc_noprof.vm_area_dup.dup_mmap.dup_mm
      0.18 ±105%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.mmput.exec_mmap.begin_new_exec.load_elf_binary
      0.51 ± 58%     -78.7%       0.11 ± 76%  perf-sched.sch_delay.max.ms.__cond_resched.mnt_want_write.do_open.path_openat.do_filp_open
      0.22 ± 97%     -99.0%       0.00 ±223%  perf-sched.sch_delay.max.ms.__cond_resched.mnt_want_write.do_unlinkat.__x64_sys_unlink.do_syscall_64
      0.12 ± 74%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.mnt_want_write.open_last_lookups.path_openat.do_filp_open
      0.03 ± 84%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.mutex_lock.futex_exec_release.exec_mm_release.exec_mmap
      0.25 ± 64%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.mutex_lock.futex_exit_release.exit_mm_release.exit_mm
      0.17 ± 73%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.mutex_lock.pipe_read.vfs_read.ksys_read
      0.65 ± 97%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.mutex_lock_killable.pcpu_alloc_noprof.__percpu_counter_init_many.mm_init
      0.05 ± 98%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.mutex_lock_killable.pcpu_alloc_noprof.mm_init.alloc_bprm
      0.19 ± 39%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.mutex_lock_killable.pcpu_alloc_noprof.mm_init.dup_mm
      0.40 ± 43%     -99.9%       0.00 ±223%  perf-sched.sch_delay.max.ms.__cond_resched.put_files_struct.do_exit.do_group_exit.__x64_sys_exit_group
      0.28 ± 19%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.remove_vma.exit_mmap.mmput.exit_mm
      0.24 ±207%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.remove_vma.vms_complete_munmap_vmas.do_vmi_align_munmap.do_vmi_munmap
      0.25 ± 72%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.remove_vma.vms_complete_munmap_vmas.mmap_region.do_mmap
      0.18 ± 86%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.shmem_get_folio_gfp.shmem_file_read_iter.vfs_read.ksys_read
      0.24 ±144%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.shmem_get_folio_gfp.shmem_write_begin.generic_perform_write.shmem_file_write_iter
      0.26 ± 38%     -82.5%       0.05 ±136%  perf-sched.sch_delay.max.ms.__cond_resched.shmem_inode_acct_blocks.shmem_alloc_and_add_folio.shmem_get_folio_gfp.shmem_write_begin
      0.48 ± 77%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.shmem_undo_range.shmem_evict_inode.evict.__dentry_kill
      3.05 ± 27%   +1002.5%      33.63 ± 17%  perf-sched.sch_delay.max.ms.__cond_resched.stop_one_cpu.sched_exec.bprm_execve.part
      0.34 ± 52%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.super_lock.iterate_supers.ksys_sync.__x64_sys_sync
      0.31 ±167%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.switch_task_namespaces.do_exit.do_group_exit.__x64_sys_exit_group
      0.10 ± 61%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.task_work_run.do_exit.do_group_exit.__x64_sys_exit_group
      0.26 ±123%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.task_work_run.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe
      0.34 ± 49%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.unmap_vmas.exit_mmap.mmput.exit_mm
      0.14 ± 72%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.unmap_vmas.vms_clear_ptes.vms_clean_up_area.mmap_region
      0.36 ± 81%     -99.7%       0.00 ±223%  perf-sched.sch_delay.max.ms.__cond_resched.unmap_vmas.vms_clear_ptes.vms_complete_munmap_vmas.mmap_region
      0.40 ± 37%    -100.0%       0.00        perf-sched.sch_delay.max.ms.__cond_resched.uprobe_start_dup_mmap.dup_mmap.dup_mm.constprop
      0.19 ± 43%     -86.5%       0.02 ±156%  perf-sched.sch_delay.max.ms.__cond_resched.vfs_write.ksys_write.do_syscall_64.entry_SYSCALL_64_after_hwframe
      0.58 ± 34%     -87.9%       0.07 ±213%  perf-sched.sch_delay.max.ms.__cond_resched.wp_page_copy.__handle_mm_fault.handle_mm_fault.do_user_addr_fault
      1.40 ± 31%     -88.9%       0.15 ±133%  perf-sched.sch_delay.max.ms.__cond_resched.zap_pmd_range.isra.0.unmap_page_range
      3.97 ± 32%    +285.0%      15.30 ± 31%  perf-sched.sch_delay.max.ms.do_wait.kernel_wait4.__do_sys_wait4.do_syscall_64
      1.01 ± 31%    +508.2%       6.14 ± 99%  perf-sched.sch_delay.max.ms.io_schedule.folio_wait_bit_common.filemap_fault.__do_fault
      1.06 ± 11%     -88.7%       0.12 ± 71%  perf-sched.sch_delay.max.ms.irqentry_exit_to_user_mode.asm_sysvec_apic_timer_interrupt.[unknown]
      0.43 ± 44%     -99.4%       0.00 ±223%  perf-sched.sch_delay.max.ms.irqentry_exit_to_user_mode.asm_sysvec_call_function_single.[unknown]
      1.15 ± 14%     -99.3%       0.01 ±171%  perf-sched.sch_delay.max.ms.irqentry_exit_to_user_mode.asm_sysvec_call_function_single.[unknown].[unknown]
      0.84 ± 51%     -99.1%       0.01 ±223%  perf-sched.sch_delay.max.ms.irqentry_exit_to_user_mode.asm_sysvec_reschedule_ipi.[unknown]
      1.59 ± 25%     -63.9%       0.57 ± 60%  perf-sched.sch_delay.max.ms.irqentry_exit_to_user_mode.asm_sysvec_reschedule_ipi.[unknown].[unknown]
      3.05 ± 20%    +565.2%      20.27 ± 69%  perf-sched.sch_delay.max.ms.pipe_read.vfs_read.ksys_read.do_syscall_64
      0.20 ± 55%   +2047.2%       4.20 ± 45%  perf-sched.sch_delay.max.ms.schedule_hrtimeout_range_clock.ep_poll.do_epoll_wait.__x64_sys_epoll_wait
      0.19 ± 49%     -86.6%       0.03 ±151%  perf-sched.sch_delay.max.ms.schedule_preempt_disabled.rwsem_down_write_slowpath.down_write.dup_mmap
      0.04 ±108%    -100.0%       0.00        perf-sched.sch_delay.max.ms.schedule_preempt_disabled.rwsem_down_write_slowpath.down_write.unlink_file_vma_batch_final
      3.80 ± 32%    +567.5%      25.38 ± 20%  perf-sched.sch_delay.max.ms.schedule_timeout.__wait_for_common.wait_for_completion_state.kernel_clone
      0.03 ± 24%  +15155.4%       4.96 ± 38%  perf-sched.sch_delay.max.ms.schedule_timeout.io_schedule_timeout.__wait_for_common.blk_execute_rq
      0.21 ± 92%   +1638.3%       3.70 ± 62%  perf-sched.sch_delay.max.ms.schedule_timeout.kcompactd.kthread.ret_from_fork
      0.78 ± 36%    +707.2%       6.33 ± 14%  perf-sched.sch_delay.max.ms.schedule_timeout.rcu_gp_fqs_loop.rcu_gp_kthread.kthread
      2.43 ± 29%    +226.7%       7.94 ± 11%  perf-sched.sch_delay.max.ms.smpboot_thread_fn.kthread.ret_from_fork.ret_from_fork_asm
      2.52 ± 29%     -64.9%       0.89 ± 83%  perf-sched.sch_delay.max.ms.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe.[unknown]
      0.39 ± 65%     -99.7%       0.00 ±223%  perf-sched.sch_delay.max.ms.syscall_exit_to_user_mode.ret_from_fork_asm.[unknown]
      0.54 ± 43%    -100.0%       0.00        perf-sched.sch_delay.max.ms.syscall_exit_to_user_mode.ret_from_fork_asm.[unknown].[unknown]
      4.74 ± 91%    +283.4%      18.17 ± 25%  perf-sched.sch_delay.max.ms.worker_thread.kthread.ret_from_fork.ret_from_fork_asm
      0.06          +218.9%       0.20        perf-sched.total_sch_delay.average.ms
      6.21 ± 60%    +512.8%      38.05 ± 19%  perf-sched.total_sch_delay.max.ms
     11.78 ±  6%     +19.6%      14.08        perf-sched.total_wait_and_delay.average.ms
     78829 ±  6%     -35.4%      50952        perf-sched.total_wait_and_delay.count.ms
     11.72 ±  6%     +18.5%      13.89        perf-sched.total_wait_time.average.ms
     13.85 ±  3%     +48.1%      20.52 ± 11%  perf-sched.wait_and_delay.avg.ms.__cond_resched.__wait_for_common.affine_move_task.__set_cpus_allowed_ptr.__sched_setaffinity
     45.00 ± 63%    -100.0%       0.00        perf-sched.wait_and_delay.avg.ms.__cond_resched.dput.scan_positives.dcache_readdir.iterate_dir
      0.06 ±  5%    +189.0%       0.19 ± 16%  perf-sched.wait_and_delay.avg.ms.__cond_resched.stop_one_cpu.sched_exec.bprm_execve.part
    105.66 ±  3%     +16.2%     122.81 ±  3%  perf-sched.wait_and_delay.avg.ms.devkmsg_read.vfs_read.ksys_read.do_syscall_64
      3.49 ± 47%    -100.0%       0.00        perf-sched.wait_and_delay.avg.ms.irqentry_exit_to_user_mode.asm_sysvec_reschedule_ipi.[unknown].[unknown]
     10.58 ±  7%     +29.6%      13.71        perf-sched.wait_and_delay.avg.ms.pipe_read.vfs_read.ksys_read.do_syscall_64
    424.15 ± 11%     +49.5%     633.95 ±  6%  perf-sched.wait_and_delay.avg.ms.schedule_hrtimeout_range_clock.do_poll.constprop.0.do_sys_poll
     82.50 ± 68%     -75.1%      20.55 ± 90%  perf-sched.wait_and_delay.avg.ms.schedule_preempt_disabled.rwsem_down_write_slowpath.down_write.do_unlinkat
     70.96 ±  5%     +13.0%      80.21        perf-sched.wait_and_delay.avg.ms.smpboot_thread_fn.kthread.ret_from_fork.ret_from_fork_asm
     75.58 ±  8%     +29.3%      97.75 ±  4%  perf-sched.wait_and_delay.avg.ms.worker_thread.kthread.ret_from_fork.ret_from_fork_asm
     70.17 ± 16%    -100.0%       0.00        perf-sched.wait_and_delay.count.__cond_resched.dput.scan_positives.dcache_readdir.iterate_dir
     18.67 ± 18%     -90.2%       1.83 ± 85%  perf-sched.wait_and_delay.count.__cond_resched.generic_perform_write.shmem_file_write_iter.vfs_write.ksys_write
    753.00 ±  5%     -99.9%       0.50 ±152%  perf-sched.wait_and_delay.count.__cond_resched.smpboot_thread_fn.kthread.ret_from_fork.ret_from_fork_asm
      2179 ±  6%     -36.1%       1393        perf-sched.wait_and_delay.count.__cond_resched.stop_one_cpu.sched_exec.bprm_execve.part
     21418 ±  6%     -29.7%      15061        perf-sched.wait_and_delay.count.do_task_dead.do_exit.do_group_exit.__x64_sys_exit_group.x64_sys_call
     21231 ±  7%     -30.8%      14692        perf-sched.wait_and_delay.count.do_wait.kernel_wait4.__do_sys_wait4.do_syscall_64
     86.83 ± 10%     -99.2%       0.67 ±223%  perf-sched.wait_and_delay.count.irqentry_exit_to_user_mode.asm_sysvec_apic_timer_interrupt.[unknown]
      1590 ±  5%     -65.0%     556.33 ±  3%  perf-sched.wait_and_delay.count.irqentry_exit_to_user_mode.asm_sysvec_apic_timer_interrupt.[unknown].[unknown]
    792.33 ±  8%    -100.0%       0.00        perf-sched.wait_and_delay.count.irqentry_exit_to_user_mode.asm_sysvec_reschedule_ipi.[unknown].[unknown]
      7701 ±  6%     -19.5%       6199        perf-sched.wait_and_delay.count.pipe_read.vfs_read.ksys_read.do_syscall_64
     77.83 ±  7%     -28.5%      55.67 ±  6%  perf-sched.wait_and_delay.count.schedule_hrtimeout_range_clock.do_poll.constprop.0.do_sys_poll
    129.67 ±  9%     -28.7%      92.50 ± 18%  perf-sched.wait_and_delay.count.schedule_preempt_disabled.rwsem_down_read_slowpath.down_read.walk_component
      6624 ±  5%     -18.5%       5402        perf-sched.wait_and_delay.count.schedule_timeout.__wait_for_common.wait_for_completion_state.kernel_clone
      5597 ±  6%     -34.1%       3690        perf-sched.wait_and_delay.count.smpboot_thread_fn.kthread.ret_from_fork.ret_from_fork_asm
      2031 ± 11%     -99.5%      10.00 ±101%  perf-sched.wait_and_delay.count.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe.[unknown]
      2934 ±  7%     -36.3%       1867 ±  2%  perf-sched.wait_and_delay.count.worker_thread.kthread.ret_from_fork.ret_from_fork_asm
      1005          -100.0%       0.00        perf-sched.wait_and_delay.max.ms.__cond_resched.dput.scan_positives.dcache_readdir.iterate_dir
      2375 ±  7%     -98.2%      43.16 ±148%  perf-sched.wait_and_delay.max.ms.__cond_resched.smpboot_thread_fn.kthread.ret_from_fork.ret_from_fork_asm
      4.43 ± 50%    +658.9%      33.63 ± 17%  perf-sched.wait_and_delay.max.ms.__cond_resched.stop_one_cpu.sched_exec.bprm_execve.part
      1796 ± 22%     -35.8%       1152 ± 25%  perf-sched.wait_and_delay.max.ms.do_task_dead.do_exit.do_group_exit.__x64_sys_exit_group.x64_sys_call
      2200 ± 54%     -92.4%     167.58 ±223%  perf-sched.wait_and_delay.max.ms.irqentry_exit_to_user_mode.asm_sysvec_apic_timer_interrupt.[unknown]
    839.75 ± 44%    -100.0%       0.00        perf-sched.wait_and_delay.max.ms.irqentry_exit_to_user_mode.asm_sysvec_reschedule_ipi.[unknown].[unknown]
      2744 ±  3%     -36.5%       1741 ± 15%  perf-sched.wait_and_delay.max.ms.smpboot_thread_fn.kthread.ret_from_fork.ret_from_fork_asm
      2864 ± 23%     -30.6%       1988 ± 11%  perf-sched.wait_and_delay.max.ms.worker_thread.kthread.ret_from_fork.ret_from_fork_asm
      0.01 ± 60%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.__alloc_pages_noprof.alloc_pages_mpol_noprof.__pmd_alloc.__handle_mm_fault
      0.33 ± 18%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.__alloc_pages_noprof.alloc_pages_mpol_noprof.__pmd_alloc.copy_p4d_range
      0.17 ± 50%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.__alloc_pages_noprof.alloc_pages_mpol_noprof.get_free_pages_noprof.pgd_alloc
      8.62 ±216%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.__alloc_pages_noprof.alloc_pages_mpol_noprof.pte_alloc_one.__pte_alloc
      0.10 ± 75%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.__anon_vma_prepare.__vmf_anon_prepare.do_anonymous_page.__handle_mm_fault
      0.09 ± 86%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.__dentry_kill.dput.step_into.link_path_walk
      0.56 ± 76%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.__kmalloc_cache_node_noprof.__get_vm_area_node.__vmalloc_node_range_noprof.alloc_thread_stack_node
      0.05 ± 33%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.__kmalloc_cache_noprof.perf_event_mmap_event.perf_event_mmap.mmap_region
      0.01 ± 51%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.__kmalloc_noprof.load_elf_phdrs.load_elf_binary.search_binary_handler
      0.33 ± 60%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.__kmalloc_noprof.lsm_blob_alloc.security_prepare_creds.prepare_creds
      0.56 ± 49%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.__kmalloc_noprof.lsm_blob_alloc.security_task_alloc.copy_process
      0.02 ± 71%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.__put_anon_vma.unlink_anon_vmas.free_pgtables.exit_mmap
     56.87 ±218%     -99.7%       0.15 ±213%  perf-sched.wait_time.avg.ms.__cond_resched.__release_sock.release_sock.__inet_stream_connect.inet_stream_connect
     42.63 ±219%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.__release_sock.release_sock.tcp_sendmsg.sock_write_iter
      0.19 ± 10%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.__tlb_batch_free_encoded_pages.tlb_finish_mmu.exit_mmap.mmput
      0.02 ± 57%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.__tlb_batch_free_encoded_pages.tlb_finish_mmu.relocate_vma_down.setup_arg_pages
      0.43 ± 44%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.__vmalloc_area_node.__vmalloc_node_range_noprof.alloc_thread_stack_node.dup_task_struct
     13.79 ±  3%     +35.5%      18.69 ± 13%  perf-sched.wait_time.avg.ms.__cond_resched.__wait_for_common.affine_move_task.__set_cpus_allowed_ptr.__sched_setaffinity
      0.13 ± 15%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.__wait_for_common.wait_for_completion_state.kernel_clone.__x64_sys_vfork
     22.06 ±212%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.aa_sk_perm.security_socket_recvmsg.sock_recvmsg.sock_read_iter
      0.88 ± 44%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.aa_sk_perm.security_socket_sendmsg.sock_write_iter.vfs_write
      0.56 ±138%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.apparmor_file_alloc_security.security_file_alloc.init_file.alloc_empty_file
      0.68 ± 45%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.cgroup_css_set_fork.cgroup_can_fork.copy_process.kernel_clone
      0.05 ± 30%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.change_pud_range.isra.0.change_protection_range
      0.37 ± 20%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.copy_page_range.dup_mmap.dup_mm.constprop
      0.09 ± 45%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.down_read.acct_collect.do_exit.do_group_exit
      0.08 ± 24%     -96.6%       0.00 ±223%  perf-sched.wait_time.avg.ms.__cond_resched.down_read.walk_component.link_path_walk.part
      0.16 ±127%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.down_read.walk_component.path_lookupat.filename_lookup
      0.39 ± 18%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.down_write.anon_vma_clone.anon_vma_fork.dup_mmap
      0.35 ± 19%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.down_write.anon_vma_fork.dup_mmap.dup_mm
      0.20 ± 24%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.down_write.free_pgtables.exit_mmap.mmput
      0.07 ± 38%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.down_write.mmap_region.do_mmap.vm_mmap_pgoff
      1.74 ± 37%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.down_write.shmem_file_write_iter.vfs_write.ksys_write
      0.10 ± 65%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.down_write.unlink_anon_vmas.free_pgtables.exit_mmap
      0.06 ± 43%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.down_write.vma_link_file.mmap_region.do_mmap
      0.11 ±150%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.down_write.vma_prepare.__split_vma.vma_modify
      0.02 ±101%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.down_write_killable.setup_arg_pages.load_elf_binary.search_binary_handler
      0.02 ± 52%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.down_write_killable.vm_mmap_pgoff.elf_load.load_elf_binary
      0.04 ± 98%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.down_write_killable.vm_mmap_pgoff.elf_load.load_elf_interp
      0.11 ± 46%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.down_write_killable.vm_mmap_pgoff.ksys_mmap_pgoff.do_syscall_64
      0.80 ± 41%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.dput.do_linkat.__x64_sys_link.do_syscall_64
      0.87 ± 48%     -96.5%       0.03 ±223%  perf-sched.wait_time.avg.ms.__cond_resched.dput.open_last_lookups.path_openat.do_filp_open
      0.13 ±108%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.dput.path_put.exit_fs.do_exit
     46.90 ±158%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.dput.path_put.vfs_statx.vfs_fstatat
     44.86 ± 63%     -95.9%       1.84 ± 36%  perf-sched.wait_time.avg.ms.__cond_resched.dput.scan_positives.dcache_readdir.iterate_dir
      0.06 ± 26%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.dput.step_into.link_path_walk.part
      0.12 ± 69%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.dput.step_into.open_last_lookups.path_openat
      0.17 ±114%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.dput.step_into.path_lookupat.filename_lookup
      0.15 ± 89%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.kmem_cache_alloc_lru_noprof.__d_alloc.d_alloc.d_alloc_parallel
     85.20 ±219%    -100.0%       0.00 ±223%  perf-sched.wait_time.avg.ms.__cond_resched.kmem_cache_alloc_node_noprof.__alloc_skb.alloc_skb_with_frags.sock_alloc_send_pskb
      0.65 ± 67%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.kmem_cache_alloc_node_noprof.dup_task_struct.copy_process.kernel_clone
      0.37 ±  9%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.kmem_cache_alloc_noprof.anon_vma_fork.dup_mmap.dup_mm
      0.72 ± 21%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.kmem_cache_alloc_noprof.copy_fs_struct.copy_process.kernel_clone
      0.05 ± 98%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.kmem_cache_alloc_noprof.mas_alloc_nodes.mas_preallocate.commit_merge
      0.05 ± 52%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.kmem_cache_alloc_noprof.mas_alloc_nodes.mas_preallocate.mmap_region
      0.04 ± 59%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.kmem_cache_alloc_noprof.vm_area_dup.__split_vma.vma_modify
      0.08 ±114%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.mmput.exec_mmap.begin_new_exec.load_elf_binary
      1.03 ± 95%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.mnt_want_write.open_last_lookups.path_openat.do_filp_open
     20.87 ± 56%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.mutex_lock.perf_poll.do_poll.constprop
     33.59 ±222%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.mutex_lock.pipe_read.vfs_read.ksys_read
      0.24 ± 17%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.mutex_lock_killable.pcpu_alloc_noprof.__percpu_counter_init_many.mm_init
      0.37 ± 13%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.mutex_lock_killable.pcpu_alloc_noprof.mm_init.dup_mm
      0.22 ± 52%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.put_files_struct.do_exit.do_group_exit.__x64_sys_exit_group
      0.14 ± 65%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.remove_vma.exit_mmap.mmput.exit_mm
      0.06 ±125%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.remove_vma.vms_complete_munmap_vmas.mmap_region.do_mmap
      1.67 ± 26%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.shmem_get_folio_gfp.shmem_file_read_iter.vfs_read.ksys_read
    146.47 ±213%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.shmem_get_folio_gfp.shmem_write_begin.generic_perform_write.shmem_file_write_iter
      1.79 ± 18%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.shmem_undo_range.shmem_evict_inode.evict.__dentry_kill
      0.01 ± 22%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.stop_one_cpu.sched_exec.bprm_execve.part
      0.09 ± 26%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.super_lock.iterate_supers.ksys_sync.__x64_sys_sync
      0.44 ± 76%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.task_work_run.do_exit.do_group_exit.__x64_sys_exit_group
      0.09 ± 80%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.unmap_vmas.exit_mmap.mmput.exit_mm
      0.15 ± 65%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.unmap_vmas.vms_clear_ptes.vms_complete_munmap_vmas.mmap_region
      0.37 ± 16%    -100.0%       0.00        perf-sched.wait_time.avg.ms.__cond_resched.uprobe_start_dup_mmap.dup_mmap.dup_mm.constprop
      0.02 ± 43%     -72.9%       0.01 ± 43%  perf-sched.wait_time.avg.ms.d_alloc_parallel.lookup_open.isra.0.open_last_lookups
    105.60 ±  3%     +16.2%     122.75 ±  3%  perf-sched.wait_time.avg.ms.devkmsg_read.vfs_read.ksys_read.do_syscall_64
      1.92 ±217%    -100.0%       0.00        perf-sched.wait_time.avg.ms.irqentry_exit_to_user_mode.asm_exc_page_fault.[unknown]
      0.13 ± 37%     -98.0%       0.00 ±223%  perf-sched.wait_time.avg.ms.irqentry_exit_to_user_mode.asm_sysvec_call_function_single.[unknown]
      3.24 ±213%    -100.0%       0.00        perf-sched.wait_time.avg.ms.irqentry_exit_to_user_mode.asm_sysvec_reschedule_ipi.[unknown]
     10.51 ±  7%     +29.7%      13.63        perf-sched.wait_time.avg.ms.pipe_read.vfs_read.ksys_read.do_syscall_64
    424.06 ± 11%     +49.4%     633.75 ±  6%  perf-sched.wait_time.avg.ms.schedule_hrtimeout_range_clock.do_poll.constprop.0.do_sys_poll
     82.47 ± 68%     -74.9%      20.66 ± 89%  perf-sched.wait_time.avg.ms.schedule_preempt_disabled.rwsem_down_write_slowpath.down_write.do_unlinkat
     75.52 ±  8%     +27.8%      96.53 ±  4%  perf-sched.wait_time.avg.ms.worker_thread.kthread.ret_from_fork.ret_from_fork_asm
      0.05 ± 91%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.__alloc_pages_noprof.alloc_pages_mpol_noprof.__pmd_alloc.__handle_mm_fault
      0.64 ± 17%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.__alloc_pages_noprof.alloc_pages_mpol_noprof.__pmd_alloc.copy_p4d_range
    670.16 ± 70%     -99.6%       2.98 ± 39%  perf-sched.wait_time.max.ms.__cond_resched.__alloc_pages_noprof.alloc_pages_mpol_noprof.folio_alloc_mpol_noprof.vma_alloc_folio_noprof
      0.36 ± 50%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.__alloc_pages_noprof.alloc_pages_mpol_noprof.get_free_pages_noprof.pgd_alloc
    167.66 ±222%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.__alloc_pages_noprof.alloc_pages_mpol_noprof.pte_alloc_one.__pte_alloc
      0.50 ± 96%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.__anon_vma_prepare.__vmf_anon_prepare.do_anonymous_page.__handle_mm_fault
      0.31 ± 74%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.__dentry_kill.dput.step_into.link_path_walk
      2.62 ±  7%     -75.4%       0.64 ±140%  perf-sched.wait_time.max.ms.__cond_resched.__fput.__x64_sys_close.do_syscall_64.entry_SYSCALL_64_after_hwframe
      1.06 ± 88%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.__kmalloc_cache_node_noprof.__get_vm_area_node.__vmalloc_node_range_noprof.alloc_thread_stack_node
      0.40 ± 52%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.__kmalloc_cache_noprof.perf_event_mmap_event.perf_event_mmap.mmap_region
      0.03 ± 57%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.__kmalloc_noprof.load_elf_phdrs.load_elf_binary.search_binary_handler
      0.90 ± 71%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.__kmalloc_noprof.lsm_blob_alloc.security_prepare_creds.prepare_creds
      1.61 ± 55%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.__kmalloc_noprof.lsm_blob_alloc.security_task_alloc.copy_process
      0.47 ± 85%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.__put_anon_vma.unlink_anon_vmas.free_pgtables.exit_mmap
    168.98 ±221%     -99.9%       0.15 ±213%  perf-sched.wait_time.max.ms.__cond_resched.__release_sock.release_sock.__inet_stream_connect.inet_stream_connect
    168.89 ±221%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.__release_sock.release_sock.tcp_sendmsg.sock_write_iter
      1.66 ± 10%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.__tlb_batch_free_encoded_pages.tlb_finish_mmu.exit_mmap.mmput
      0.10 ±104%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.__tlb_batch_free_encoded_pages.tlb_finish_mmu.relocate_vma_down.setup_arg_pages
      2.96 ± 13%     -46.0%       1.60 ± 65%  perf-sched.wait_time.max.ms.__cond_resched.__tlb_batch_free_encoded_pages.tlb_finish_mmu.vms_clear_ptes.vms_complete_munmap_vmas
      1.12 ± 66%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.__vmalloc_area_node.__vmalloc_node_range_noprof.alloc_thread_stack_node.dup_task_struct
    836.76 ± 44%     -99.8%       1.53 ± 69%  perf-sched.wait_time.max.ms.__cond_resched.__wait_for_common.wait_for_completion_state.kernel_clone.__do_sys_clone3
      3.03 ± 38%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.__wait_for_common.wait_for_completion_state.kernel_clone.__x64_sys_vfork
    169.67 ±220%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.aa_sk_perm.security_socket_recvmsg.sock_recvmsg.sock_read_iter
      2.16 ± 41%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.aa_sk_perm.security_socket_sendmsg.sock_write_iter.vfs_write
      1.28 ± 82%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.apparmor_file_alloc_security.security_file_alloc.init_file.alloc_empty_file
      2.09 ± 35%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.cgroup_css_set_fork.cgroup_can_fork.copy_process.kernel_clone
      0.31 ± 39%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.change_pud_range.isra.0.change_protection_range
      0.58 ± 29%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.copy_page_range.dup_mmap.dup_mm.constprop
      1.19 ± 49%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.down_read.acct_collect.do_exit.do_group_exit
      0.52 ± 41%     -99.5%       0.00 ±223%  perf-sched.wait_time.max.ms.__cond_resched.down_read.walk_component.link_path_walk.part
      0.55 ±138%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.down_read.walk_component.path_lookupat.filename_lookup
      1.11 ± 44%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.down_write.anon_vma_clone.anon_vma_fork.dup_mmap
      0.76 ± 35%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.down_write.anon_vma_fork.dup_mmap.dup_mm
      2.71 ± 10%     -72.7%       0.74 ±141%  perf-sched.wait_time.max.ms.__cond_resched.down_write.do_truncate.do_open.path_openat
      1.46 ± 31%     -92.3%       0.11 ±223%  perf-sched.wait_time.max.ms.__cond_resched.down_write.dup_mmap.dup_mm.constprop
      1.65 ± 20%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.down_write.free_pgtables.exit_mmap.mmput
      0.22 ± 42%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.down_write.mmap_region.do_mmap.vm_mmap_pgoff
      2.00 ± 33%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.down_write.shmem_file_write_iter.vfs_write.ksys_write
      0.82 ± 51%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.down_write.unlink_anon_vmas.free_pgtables.exit_mmap
      0.33 ± 50%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.down_write.vma_link_file.mmap_region.do_mmap
      0.30 ±131%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.down_write.vma_prepare.__split_vma.vma_modify
      0.76 ± 47%     -99.3%       0.01 ±223%  perf-sched.wait_time.max.ms.__cond_resched.down_write_killable.exec_mmap.begin_new_exec.load_elf_binary
      0.11 ± 89%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.down_write_killable.setup_arg_pages.load_elf_binary.search_binary_handler
      0.11 ± 80%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.down_write_killable.vm_mmap_pgoff.elf_load.load_elf_binary
      0.18 ± 79%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.down_write_killable.vm_mmap_pgoff.elf_load.load_elf_interp
      0.41 ± 44%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.down_write_killable.vm_mmap_pgoff.ksys_mmap_pgoff.do_syscall_64
      2.35 ± 44%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.dput.do_linkat.__x64_sys_link.do_syscall_64
      2.19 ± 41%     -98.6%       0.03 ±223%  perf-sched.wait_time.max.ms.__cond_resched.dput.open_last_lookups.path_openat.do_filp_open
      0.35 ±113%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.dput.path_put.exit_fs.do_exit
    335.13 ±140%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.dput.path_put.vfs_statx.vfs_fstatat
      1005           -99.7%       2.66 ± 20%  perf-sched.wait_time.max.ms.__cond_resched.dput.scan_positives.dcache_readdir.iterate_dir
    169.41 ±219%     -99.6%       0.74 ±149%  perf-sched.wait_time.max.ms.__cond_resched.dput.shmem_unlink.vfs_unlink.do_unlinkat
      0.41 ± 50%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.dput.step_into.link_path_walk.part
      0.23 ± 46%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.dput.step_into.open_last_lookups.path_openat
      0.87 ±117%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.dput.step_into.path_lookupat.filename_lookup
    335.93 ±140%     -99.5%       1.70 ± 70%  perf-sched.wait_time.max.ms.__cond_resched.dput.terminate_walk.path_openat.do_filp_open
      1.72 ± 21%     -96.1%       0.07 ±223%  perf-sched.wait_time.max.ms.__cond_resched.exit_mmap.mmput.exit_mm.do_exit
      0.68 ±134%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.kmem_cache_alloc_lru_noprof.__d_alloc.d_alloc.d_alloc_parallel
      2.47 ± 10%     -72.3%       0.68 ±142%  perf-sched.wait_time.max.ms.__cond_resched.kmem_cache_alloc_lru_noprof.shmem_alloc_inode.alloc_inode.new_inode
    169.71 ±220%    -100.0%       0.00 ±223%  perf-sched.wait_time.max.ms.__cond_resched.kmem_cache_alloc_node_noprof.__alloc_skb.alloc_skb_with_frags.sock_alloc_send_pskb
      1.46 ± 83%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.kmem_cache_alloc_node_noprof.dup_task_struct.copy_process.kernel_clone
      2.32 ± 33%     -80.1%       0.46 ±174%  perf-sched.wait_time.max.ms.__cond_resched.kmem_cache_alloc_noprof.alloc_empty_file.path_openat.do_filp_open
      1.89 ± 29%     -67.6%       0.61 ±141%  perf-sched.wait_time.max.ms.__cond_resched.kmem_cache_alloc_noprof.alloc_pid.copy_process.kernel_clone
      1.75 ± 57%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.kmem_cache_alloc_noprof.anon_vma_fork.dup_mmap.dup_mm
      2.20 ± 14%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.kmem_cache_alloc_noprof.copy_fs_struct.copy_process.kernel_clone
      0.14 ± 63%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.kmem_cache_alloc_noprof.mas_alloc_nodes.mas_preallocate.commit_merge
      0.33 ± 70%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.kmem_cache_alloc_noprof.mas_alloc_nodes.mas_preallocate.mmap_region
      2.22 ± 30%     -84.2%       0.35 ±223%  perf-sched.wait_time.max.ms.__cond_resched.kmem_cache_alloc_noprof.security_inode_alloc.inode_init_always.alloc_inode
      0.19 ± 67%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.kmem_cache_alloc_noprof.vm_area_dup.__split_vma.vma_modify
      0.16 ±110%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.mmput.exec_mmap.begin_new_exec.load_elf_binary
    168.83 ±220%     -99.9%       0.23 ±223%  perf-sched.wait_time.max.ms.__cond_resched.mnt_want_write.do_unlinkat.__x64_sys_unlink.do_syscall_64
      1.30 ± 90%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.mnt_want_write.open_last_lookups.path_openat.do_filp_open
     49.02 ± 71%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.mutex_lock.perf_poll.do_poll.constprop
    167.38 ±223%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.mutex_lock.pipe_read.vfs_read.ksys_read
      1.06 ± 52%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.mutex_lock_killable.pcpu_alloc_noprof.__percpu_counter_init_many.mm_init
      0.62 ± 38%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.mutex_lock_killable.pcpu_alloc_noprof.mm_init.dup_mm
      1.29 ± 28%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.put_files_struct.do_exit.do_group_exit.__x64_sys_exit_group
      0.76 ± 67%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.remove_vma.exit_mmap.mmput.exit_mm
      0.16 ± 75%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.remove_vma.vms_complete_munmap_vmas.mmap_region.do_mmap
      2.31 ± 10%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.shmem_get_folio_gfp.shmem_file_read_iter.vfs_read.ksys_read
    586.41 ±213%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.shmem_get_folio_gfp.shmem_write_begin.generic_perform_write.shmem_file_write_iter
      2.50 ±  9%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.shmem_undo_range.shmem_evict_inode.evict.__dentry_kill
      2375 ±  7%     -98.3%      41.33 ±148%  perf-sched.wait_time.max.ms.__cond_resched.smpboot_thread_fn.kthread.ret_from_fork.ret_from_fork_asm
      1.97 ± 64%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.stop_one_cpu.sched_exec.bprm_execve.part
      0.34 ± 52%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.super_lock.iterate_supers.ksys_sync.__x64_sys_sync
      0.77 ± 60%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.task_work_run.do_exit.do_group_exit.__x64_sys_exit_group
      0.74 ± 79%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.unmap_vmas.exit_mmap.mmput.exit_mm
      0.37 ± 79%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.unmap_vmas.vms_clear_ptes.vms_complete_munmap_vmas.mmap_region
      1.06 ± 67%    -100.0%       0.00        perf-sched.wait_time.max.ms.__cond_resched.uprobe_start_dup_mmap.dup_mmap.dup_mm.constprop
      2.09 ± 24%     -63.9%       0.75 ±141%  perf-sched.wait_time.max.ms.__cond_resched.vfs_write.ksys_write.do_syscall_64.entry_SYSCALL_64_after_hwframe
      0.17 ± 35%     -74.1%       0.04 ±122%  perf-sched.wait_time.max.ms.d_alloc_parallel.__lookup_slow.walk_component.path_lookupat
      1796 ± 22%     -35.8%       1152 ± 25%  perf-sched.wait_time.max.ms.do_task_dead.do_exit.do_group_exit.__x64_sys_exit_group.x64_sys_call
    167.97 ±222%    -100.0%       0.00        perf-sched.wait_time.max.ms.irqentry_exit_to_user_mode.asm_exc_page_fault.[unknown]
      2200 ± 54%     -92.3%     169.14 ±221%  perf-sched.wait_time.max.ms.irqentry_exit_to_user_mode.asm_sysvec_apic_timer_interrupt.[unknown]
      1.42 ± 67%     -99.8%       0.00 ±223%  perf-sched.wait_time.max.ms.irqentry_exit_to_user_mode.asm_sysvec_call_function_single.[unknown]
    170.12 ±219%     -99.8%       0.32 ±218%  perf-sched.wait_time.max.ms.irqentry_exit_to_user_mode.asm_sysvec_call_function_single.[unknown].[unknown]
    168.00 ±221%    -100.0%       0.00        perf-sched.wait_time.max.ms.irqentry_exit_to_user_mode.asm_sysvec_reschedule_ipi.[unknown]
      0.52 ± 15%     -70.5%       0.15 ±145%  perf-sched.wait_time.max.ms.schedule_preempt_disabled.rwsem_down_write_slowpath.down_write.dup_mmap
      2744 ±  3%     -36.5%       1741 ± 15%  perf-sched.wait_time.max.ms.smpboot_thread_fn.kthread.ret_from_fork.ret_from_fork_asm
      2864 ± 23%     -30.6%       1987 ± 10%  perf-sched.wait_time.max.ms.worker_thread.kthread.ret_from_fork.ret_from_fork_asm




Disclaimer:
Results have been estimated based on internal Intel analysis and are provided
for informational purposes only. Any difference in system hardware or software
design or configuration may affect actual performance.


-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Re: [PATCH v2] sched/eevdf: Reduce the computation frequency of avg_vruntime
Posted by Peter Zijlstra 1 month, 2 weeks ago
On Fri, Oct 11, 2024 at 02:24:49PM +0800, Xavier wrote:
> The current code subtracts the value of curr from avg_vruntime and avg_load
> during runtime. Then, every time avg_vruntime() is called, it adds the
> value of curr to the avg_vruntime and avg_load. Afterward, it divides these
> and adds min_vruntime to obtain the actual avg_vruntime.
> 
> Analysis of the code indicates that avg_vruntime only changes significantly
> during update_curr(), update_min_vruntime(), and when tasks are enqueued or
> dequeued. Therefore, it is sufficient to recalculate and store avg_vruntime
> only in these specific scenarios. This optimization ensures that accessing
> avg_vruntime() does not necessitate a recalculation each time, thereby
> enhancing the efficiency of the code.
> 
> There is no need to subtract curr’s load from avg_load during runtime.
> Instead, we only need to calculate the incremental change and update
> avg_vruntime whenever curr’s time is updated.
> 
> To better represent their functions, rename the original avg_vruntime and
> avg_load to tot_vruntime and tot_load, respectively, which more accurately
> describes their roles in the computation.
> 
> Signed-off-by: Xavier <xavier_qy@163.com>

This makes the code more complicated for no shown benefit.
Re:Re: [PATCH v2] sched/eevdf: Reduce the computation frequency of avg_vruntime
Posted by Xavier 1 month, 2 weeks ago



At 2024-10-11 16:52:01, "Peter Zijlstra" <peterz@infradead.org> wrote:
>On Fri, Oct 11, 2024 at 02:24:49PM +0800, Xavier wrote:
>> The current code subtracts the value of curr from avg_vruntime and avg_load
>> during runtime. Then, every time avg_vruntime() is called, it adds the
>> value of curr to the avg_vruntime and avg_load. Afterward, it divides these
>> and adds min_vruntime to obtain the actual avg_vruntime.
>> 
>> Analysis of the code indicates that avg_vruntime only changes significantly
>> during update_curr(), update_min_vruntime(), and when tasks are enqueued or
>> dequeued. Therefore, it is sufficient to recalculate and store avg_vruntime
>> only in these specific scenarios. This optimization ensures that accessing
>> avg_vruntime() does not necessitate a recalculation each time, thereby
>> enhancing the efficiency of the code.
>> 
>> There is no need to subtract curr’s load from avg_load during runtime.
>> Instead, we only need to calculate the incremental change and update
>> avg_vruntime whenever curr’s time is updated.
>> 
>> To better represent their functions, rename the original avg_vruntime and
>> avg_load to tot_vruntime and tot_load, respectively, which more accurately
>> describes their roles in the computation.
>> 
>> Signed-off-by: Xavier <xavier_qy@163.com>
>

>This makes the code more complicated for no shown benefit.




Hi Peter,

Thank you for reviewing this patch. I would like to address your questions as follows:

Code Complexity vs. Understandability: I agree that this modification adds some
 complexity to the code, but the method of calculation is more straightforward.
 This patch maintains consistency in how avg_vruntime is added or subtracted
 relative to load. Specifically, the enqueue and dequeue operations of tasks directly
 impact the avg_vruntime of cfs_rq, which seems logical.

Efficiency Improvements: This approach minimizes unnecessary calculations,
 thereby enhancing execution efficiency. I understand that entity_eligible() and
 vruntime_eligible() are high-frequency operations. The existing code recalculates
 curr->vruntime added to cfs_rq->avg_vruntime for each eligibility check.
 If many tasks in the cfs_rq do not meet the conditions, it leads to multiple
 redundant calculations within pick_eevdf(). This patch resolves this issue by
 computing cfs_rq->tot_vruntime only when an update is necessary, allowing
 vruntime_eligible() to utilize the precomputed value directly.

Reducing avg_vruntime Calculations: This patch also reduces the frequency of
 avg_vruntime evaluations. The original code calls avg_vruntime() every time it's
 needed, despite many of those calls being redundant when curr->vruntime
 hasn't changed. This patch ensures that cfs_rq->avg_vruntime is updated only
 when curr->vruntime or cfs_rq->tot_vruntime changes, allowing subsequent
 calls to directly access the current value. This greatly decreases the frequency
 of avg_vruntime calculations.

I hope this explanation clarifies the benefits of the patch.
 I welcome any comments or suggestions. Thank you!
[PATCH v2] sched/eevdf: Reduce the computation frequency of avg_vruntime
Posted by Xavier 1 month, 2 weeks ago
From: xw357835 <xw357835@alibaba-inc.com>

The current code subtracts the value of curr from avg_vruntime and avg_load
during runtime. Then, every time avg_vruntime() is called, it adds the
value of curr to the avg_vruntime and avg_load. Afterward, it divides these
and adds min_vruntime to obtain the actual avg_vruntime.

Analysis of the code indicates that avg_vruntime only changes significantly
during update_curr(), update_min_vruntime(), and when tasks are enqueued or
dequeued. Therefore, it is sufficient to recalculate and store avg_vruntime
only in these specific scenarios. This optimization ensures that accessing
avg_vruntime() does not necessitate a recalculation each time, thereby
enhancing the efficiency of the code.

There is no need to subtract curr’s load from avg_load during runtime.
Instead, we only need to calculate the incremental change and update
avg_vruntime whenever curr’s time is updated.

To better represent their functions, rename the original avg_vruntime and
avg_load to tot_vruntime and tot_load, respectively, which more accurately
describes their roles in the computation.

Signed-off-by: xw357835 <xw357835@alibaba-inc.com>
---
Note:
The patch V2 has been updated based on the latest sched/core branch.

 kernel/sched/fair.c  | 107 ++++++++++++++++++++++++-------------------
 kernel/sched/sched.h |   3 +-
 2 files changed, 61 insertions(+), 49 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 5a621210c9c..fb0434dd0a8 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -606,8 +606,8 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
  * Which we track using:
  *
  *                    v0 := cfs_rq->min_vruntime
- * \Sum (v_i - v0) * w_i := cfs_rq->avg_vruntime
- *              \Sum w_i := cfs_rq->avg_load
+ * \Sum (v_i - v0) * w_i := cfs_rq->tot_vruntime
+ *              \Sum w_i := cfs_rq->tot_load
  *
  * Since min_vruntime is a monotonic increasing variable that closely tracks
  * the per-task service, these deltas: (v_i - v), will be in the order of the
@@ -617,14 +617,29 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
  *
  * As measured, the max (key * weight) value was ~44 bits for a kernel build.
  */
+static inline void avg_vruntime_update(struct cfs_rq *cfs_rq)
+{
+	s64	tot_vruntime = cfs_rq->tot_vruntime;
+
+	/* sign flips effective floor / ceiling */
+	if (cfs_rq->tot_load) {
+		if (tot_vruntime < 0)
+			tot_vruntime -= (cfs_rq->tot_load - 1);
+		cfs_rq->avg_vruntime = div_s64(tot_vruntime, cfs_rq->tot_load);
+	} else {
+		cfs_rq->avg_vruntime = cfs_rq->tot_vruntime;
+	}
+}
+
 static void
 avg_vruntime_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	unsigned long weight = scale_load_down(se->load.weight);
 	s64 key = entity_key(cfs_rq, se);
 
-	cfs_rq->avg_vruntime += key * weight;
-	cfs_rq->avg_load += weight;
+	cfs_rq->tot_vruntime += key * weight;
+	cfs_rq->tot_load += weight;
+	avg_vruntime_update(cfs_rq);
 }
 
 static void
@@ -633,17 +648,29 @@ avg_vruntime_sub(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	unsigned long weight = scale_load_down(se->load.weight);
 	s64 key = entity_key(cfs_rq, se);
 
-	cfs_rq->avg_vruntime -= key * weight;
-	cfs_rq->avg_load -= weight;
+	cfs_rq->tot_vruntime -= key * weight;
+	cfs_rq->tot_load -= weight;
+	avg_vruntime_update(cfs_rq);
+}
+
+static inline
+void avg_vruntime_update_for_curr(struct cfs_rq *cfs_rq, s64 delta)
+{
+	struct sched_entity *curr = cfs_rq->curr;
+	unsigned long weight = scale_load_down(curr->load.weight);
+
+	cfs_rq->tot_vruntime += delta * weight;
+	avg_vruntime_update(cfs_rq);
 }
 
 static inline
-void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
+void avg_vruntime_update_for_minv(struct cfs_rq *cfs_rq, s64 delta)
 {
 	/*
-	 * v' = v + d ==> avg_vruntime' = avg_runtime - d*avg_load
+	 * v' = v + d ==> avg_runtime' = tot_runtime - d*tot_load
 	 */
-	cfs_rq->avg_vruntime -= cfs_rq->avg_load * delta;
+	cfs_rq->tot_vruntime -= cfs_rq->tot_load * delta;
+	avg_vruntime_update(cfs_rq);
 }
 
 /*
@@ -652,25 +679,7 @@ void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
  */
 u64 avg_vruntime(struct cfs_rq *cfs_rq)
 {
-	struct sched_entity *curr = cfs_rq->curr;
-	s64 avg = cfs_rq->avg_vruntime;
-	long load = cfs_rq->avg_load;
-
-	if (curr && curr->on_rq) {
-		unsigned long weight = scale_load_down(curr->load.weight);
-
-		avg += entity_key(cfs_rq, curr) * weight;
-		load += weight;
-	}
-
-	if (load) {
-		/* sign flips effective floor / ceiling */
-		if (avg < 0)
-			avg -= (load - 1);
-		avg = div_s64(avg, load);
-	}
-
-	return cfs_rq->min_vruntime + avg;
+	return cfs_rq->min_vruntime + cfs_rq->avg_vruntime;
 }
 
 /*
@@ -725,18 +734,8 @@ static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
  */
 static int vruntime_eligible(struct cfs_rq *cfs_rq, u64 vruntime)
 {
-	struct sched_entity *curr = cfs_rq->curr;
-	s64 avg = cfs_rq->avg_vruntime;
-	long load = cfs_rq->avg_load;
-
-	if (curr && curr->on_rq) {
-		unsigned long weight = scale_load_down(curr->load.weight);
-
-		avg += entity_key(cfs_rq, curr) * weight;
-		load += weight;
-	}
-
-	return avg >= (s64)(vruntime - cfs_rq->min_vruntime) * load;
+	return cfs_rq->tot_vruntime >=
+			(s64)(vruntime - cfs_rq->min_vruntime) * (s64)cfs_rq->tot_load;
 }
 
 int entity_eligible(struct cfs_rq *cfs_rq, struct sched_entity *se)
@@ -752,7 +751,7 @@ static u64 __update_min_vruntime(struct cfs_rq *cfs_rq, u64 vruntime)
 	 */
 	s64 delta = (s64)(vruntime - min_vruntime);
 	if (delta > 0) {
-		avg_vruntime_update(cfs_rq, delta);
+		avg_vruntime_update_for_minv(cfs_rq, delta);
 		min_vruntime = vruntime;
 	}
 	return min_vruntime;
@@ -851,7 +850,6 @@ RB_DECLARE_CALLBACKS(static, min_vruntime_cb, struct sched_entity,
  */
 static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
-	avg_vruntime_add(cfs_rq, se);
 	se->min_vruntime = se->vruntime;
 	se->min_slice = se->slice;
 	rb_add_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
@@ -862,7 +860,6 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	rb_erase_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
 				  &min_vruntime_cb);
-	avg_vruntime_sub(cfs_rq, se);
 }
 
 struct sched_entity *__pick_root_entity(struct cfs_rq *cfs_rq)
@@ -1219,6 +1216,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
 	struct rq *rq = rq_of(cfs_rq);
 	s64 delta_exec;
 	bool resched;
+	s64 vdelta_exec;
 
 	if (unlikely(!curr))
 		return;
@@ -1227,8 +1225,10 @@ static void update_curr(struct cfs_rq *cfs_rq)
 	if (unlikely(delta_exec <= 0))
 		return;
 
-	curr->vruntime += calc_delta_fair(delta_exec, curr);
+	vdelta_exec = calc_delta_fair(delta_exec, curr);
+	curr->vruntime += vdelta_exec;
 	resched = update_deadline(cfs_rq, curr);
+	avg_vruntime_update_for_curr(cfs_rq, vdelta_exec);
 	update_min_vruntime(cfs_rq);
 
 	if (entity_is_task(curr)) {
@@ -3883,6 +3883,8 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
 		avruntime = avg_vruntime(cfs_rq);
 		if (!curr)
 			__dequeue_entity(cfs_rq, se);
+
+		avg_vruntime_sub(cfs_rq, se);
 		update_load_sub(&cfs_rq->load, se->load.weight);
 	}
 	dequeue_load_avg(cfs_rq, se);
@@ -3913,6 +3915,8 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
 		if (!curr)
 			__enqueue_entity(cfs_rq, se);
 
+		avg_vruntime_add(cfs_rq, se);
+
 		/*
 		 * The entity's vruntime has been adjusted, so let's check
 		 * whether the rq-wide min_vruntime needs updated too. Since
@@ -5281,7 +5285,6 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 	 * EEVDF: placement strategy #1 / #2
 	 */
 	if (sched_feat(PLACE_LAG) && cfs_rq->nr_running && se->vlag) {
-		struct sched_entity *curr = cfs_rq->curr;
 		unsigned long load;
 
 		lag = se->vlag;
@@ -5338,9 +5341,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 		 *
 		 *   vl_i = (W + w_i)*vl'_i / W
 		 */
-		load = cfs_rq->avg_load;
-		if (curr && curr->on_rq)
-			load += scale_load_down(curr->load.weight);
+		load = cfs_rq->tot_load;
 
 		lag *= load + scale_load_down(se->load.weight);
 		if (WARN_ON_ONCE(!load))
@@ -5427,6 +5428,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 	update_stats_enqueue_fair(cfs_rq, se, flags);
 	if (!curr)
 		__enqueue_entity(cfs_rq, se);
+
+	avg_vruntime_add(cfs_rq, se);
 	se->on_rq = 1;
 
 	if (cfs_rq->nr_running == 1) {
@@ -5530,6 +5533,8 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 
 	if (se != cfs_rq->curr)
 		__dequeue_entity(cfs_rq, se);
+
+	avg_vruntime_sub(cfs_rq, se);
 	se->on_rq = 0;
 	account_entity_dequeue(cfs_rq, se);
 
@@ -6924,11 +6929,17 @@ requeue_delayed_entity(struct sched_entity *se)
 			cfs_rq->nr_running--;
 			if (se != cfs_rq->curr)
 				__dequeue_entity(cfs_rq, se);
+			avg_vruntime_sub(cfs_rq, se);
+
 			se->vlag = 0;
 			place_entity(cfs_rq, se, 0);
+
 			if (se != cfs_rq->curr)
 				__enqueue_entity(cfs_rq, se);
+			avg_vruntime_add(cfs_rq, se);
 			cfs_rq->nr_running++;
+
+			update_min_vruntime(cfs_rq);
 		}
 	}
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index b1c3588a8f0..7f7c93518c7 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -650,8 +650,9 @@ struct cfs_rq {
 	unsigned int		idle_nr_running;   /* SCHED_IDLE */
 	unsigned int		idle_h_nr_running; /* SCHED_IDLE */
 
+	s64			tot_vruntime;
 	s64			avg_vruntime;
-	u64			avg_load;
+	u64			tot_load;
 
 	u64			min_vruntime;
 #ifdef CONFIG_SCHED_CORE
-- 
2.45.2