[RFC PATCH 06/22] sched/fair: Propagate the min_vruntime of kernel mode preempted entity

K Prateek Nayak posted 22 patches 10 months ago
[RFC PATCH 06/22] sched/fair: Propagate the min_vruntime of kernel mode preempted entity
Posted by K Prateek Nayak 10 months ago
Propagate the min_vruntime of the kernel mode preempted entity to the
root of the cfs_rq's rbtree. This will be soon used to pick amongst the
kernel mode entities on a throttled hierarchy using the similar min-heap
approach that pick_eevdf() currently implements.

Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
---
 include/linux/sched.h |  6 ++++++
 kernel/sched/fair.c   | 47 ++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 52 insertions(+), 1 deletion(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 63f3f235a5c1..4bb7e45758f4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -593,6 +593,12 @@ struct sched_entity {
 	 */
 	int				kernel_cs_count;
 					/* hole */
+
+	/*
+	 * min_vruntime of the kernel mode preempted entities
+	 * in the subtree of this sched entity.
+	 */
+	s64				min_kcs_vruntime;
 #endif /* CONFIG_CFS_BANDWIDTH */
 #endif /* CONFIG_FAIR_GROUP_SCHED */
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index cbb7a227afe7..ba1bd60ce433 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -828,6 +828,9 @@ static inline void __min_slice_update(struct sched_entity *se, struct rb_node *n
 	}
 }
 
+static __always_inline void init_se_kcs_stats(struct sched_entity *se);
+static inline bool min_kcs_vruntime_update(struct sched_entity *se);
+
 /*
  * se->min_vruntime = min(se->vruntime, {left,right}->min_vruntime)
  */
@@ -836,6 +839,7 @@ static inline bool min_vruntime_update(struct sched_entity *se, bool exit)
 	u64 old_min_vruntime = se->min_vruntime;
 	u64 old_min_slice = se->min_slice;
 	struct rb_node *node = &se->run_node;
+	bool kcs_stats_unchanged = min_kcs_vruntime_update(se);
 
 	se->min_vruntime = se->vruntime;
 	__min_vruntime_update(se, node->rb_right);
@@ -846,7 +850,8 @@ static inline bool min_vruntime_update(struct sched_entity *se, bool exit)
 	__min_slice_update(se, node->rb_left);
 
 	return se->min_vruntime == old_min_vruntime &&
-	       se->min_slice == old_min_slice;
+	       se->min_slice == old_min_slice &&
+	       kcs_stats_unchanged;
 }
 
 RB_DECLARE_CALLBACKS(static, min_vruntime_cb, struct sched_entity,
@@ -858,6 +863,7 @@ RB_DECLARE_CALLBACKS(static, min_vruntime_cb, struct sched_entity,
 static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	avg_vruntime_add(cfs_rq, se);
+	init_se_kcs_stats(se);
 	se->min_vruntime = se->vruntime;
 	se->min_slice = se->slice;
 	rb_add_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
@@ -6778,6 +6784,39 @@ static __always_inline void avg_kcs_vruntime_update(struct cfs_rq *cfs_rq, s64 d
 	cfs_rq->avg_kcs_vruntime -= cfs_rq->avg_kcs_load * delta;
 }
 
+static __always_inline void init_se_kcs_stats(struct sched_entity *se)
+{
+	/*
+	 * With the introduction of EEVDF, the vruntime of entities can go negative when
+	 * a lagging entity joins a runqueue with avg_vruntime < vlag. Use LLONG_MAX as
+	 * the upper bound to differentiate the case where no kernel mode preempted
+	 * entities are queued on the subtree.
+	 */
+	se->min_kcs_vruntime = (se_in_kernel(se)) ? se->vruntime : LLONG_MAX;
+}
+
+static inline void __min_kcs_vruntime_update(struct sched_entity *se, struct rb_node *node)
+{
+	if (node) {
+		struct sched_entity *rse = __node_2_se(node);
+
+		if (rse->min_kcs_vruntime < se->min_kcs_vruntime)
+			se->min_kcs_vruntime = rse->min_kcs_vruntime;
+	}
+}
+
+static inline bool min_kcs_vruntime_update(struct sched_entity *se)
+{
+	u64 old_min_kcs_vruntime = se->min_kcs_vruntime;
+	struct rb_node *node = &se->run_node;
+
+	init_se_kcs_stats(se);
+	__min_kcs_vruntime_update(se, node->rb_right);
+	__min_kcs_vruntime_update(se, node->rb_left);
+
+	return se->min_kcs_vruntime == old_min_kcs_vruntime;
+}
+
 #ifdef CONFIG_NO_HZ_FULL
 /* called from pick_next_task_fair() */
 static void sched_fair_update_stop_tick(struct rq *rq, struct task_struct *p)
@@ -6853,6 +6892,12 @@ __always_inline void sched_notify_critical_section_exit(void) {}
 static __always_inline void avg_kcs_vruntime_add(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
 static __always_inline void avg_kcs_vruntime_sub(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
 static __always_inline void avg_kcs_vruntime_update(struct cfs_rq *cfs_rq, s64 delta) {}
+static __always_inline void init_se_kcs_stats(struct sched_entity *se) {}
+
+static inline bool min_kcs_vruntime_update(struct sched_entity *se)
+{
+	return true;
+}
 
 #endif /* CONFIG_CFS_BANDWIDTH */
 
-- 
2.43.0