[RFC PATCH 05/22] sched/fair: Track EEVDF stats for entities preempted in kernel mode

K Prateek Nayak posted 22 patches 11 months, 3 weeks ago
[RFC PATCH 05/22] sched/fair: Track EEVDF stats for entities preempted in kernel mode
Posted by K Prateek Nayak 11 months, 3 weeks ago
Throttled hierarchies will require only picking between kernel mode
preempted entities queued on them with throttle deferral.

Track EEVDF stats of kernel mode preempted entities in avg_kcs_vruntime
and avg_kcs_load which is the same as avg_vruntime and avg_load
respectively, but only contains stats for kernel mode preempted entities
queued on the rbtree.

Since all the checks for eligibility are entity_key() based, also update
avg_kcs_vruntime when min_vruntime of the cfs_rq changes.

Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
---
 kernel/sched/fair.c  | 62 ++++++++++++++++++++++++++++++++++++++++++++
 kernel/sched/sched.h |  6 +++++
 2 files changed, 68 insertions(+)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index becf2d35f35a..cbb7a227afe7 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -523,6 +523,9 @@ static int se_is_idle(struct sched_entity *se)
 
 static __always_inline
 void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec);
+static __always_inline void avg_kcs_vruntime_add(struct cfs_rq *cfs_rq, struct sched_entity *se);
+static __always_inline void avg_kcs_vruntime_sub(struct cfs_rq *cfs_rq, struct sched_entity *se);
+static __always_inline void avg_kcs_vruntime_update(struct cfs_rq *cfs_rq, s64 delta);
 
 /**************************************************************
  * Scheduling class tree data structure manipulation methods:
@@ -630,6 +633,7 @@ avg_vruntime_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
 
 	cfs_rq->avg_vruntime += key * weight;
 	cfs_rq->avg_load += weight;
+	avg_kcs_vruntime_add(cfs_rq, se);
 }
 
 static void
@@ -640,6 +644,7 @@ avg_vruntime_sub(struct cfs_rq *cfs_rq, struct sched_entity *se)
 
 	cfs_rq->avg_vruntime -= key * weight;
 	cfs_rq->avg_load -= weight;
+	avg_kcs_vruntime_sub(cfs_rq, se);
 }
 
 static inline
@@ -649,6 +654,7 @@ void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
 	 * v' = v + d ==> avg_vruntime' = avg_runtime - d*avg_load
 	 */
 	cfs_rq->avg_vruntime -= cfs_rq->avg_load * delta;
+	avg_kcs_vruntime_update(cfs_rq, delta);
 }
 
 /*
@@ -6720,6 +6726,58 @@ __always_inline void sched_notify_critical_section_exit(void)
 	current->se.kernel_cs_count--;
 }
 
+static inline int se_in_kernel(struct sched_entity *se)
+{
+	return se->kernel_cs_count;
+}
+
+/*
+ * Same as avg_vruntime_add() except avg_kcs_vruntime_add() only adjusts the avg_kcs_vruntime
+ * and avg_kcs_load of kernel mode preempted entity when it joins the rbtree.
+ */
+static __always_inline void avg_kcs_vruntime_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+	unsigned long weight;
+	s64 key;
+
+	if (!se_in_kernel(se))
+		return;
+
+	weight = scale_load_down(se->load.weight);
+	key = entity_key(cfs_rq, se);
+
+	cfs_rq->avg_kcs_vruntime += key * weight;
+	cfs_rq->avg_kcs_load += weight;
+}
+
+/*
+ * Same as avg_vruntime_sub() except avg_kcs_vruntime_sub() only adjusts the avg_kcs_vruntime
+ * and avg_kcs_load of kernel mode preempted entity when it leaves the rbtree.
+ */
+static __always_inline void avg_kcs_vruntime_sub(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+	unsigned long weight;
+	s64 key;
+
+	if (!se_in_kernel(se))
+		return;
+
+	weight = scale_load_down(se->load.weight);
+	key = entity_key(cfs_rq, se);
+
+	cfs_rq->avg_kcs_vruntime -= key * weight;
+	cfs_rq->avg_kcs_load -= weight;
+}
+
+/*
+ * Same as avg_vruntime_update() except it adjusts avg_kcs_vruntime based on avg_kcs_load
+ * when min_vruntime of the cfs_rq changes.
+ */
+static __always_inline void avg_kcs_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
+{
+	cfs_rq->avg_kcs_vruntime -= cfs_rq->avg_kcs_load * delta;
+}
+
 #ifdef CONFIG_NO_HZ_FULL
 /* called from pick_next_task_fair() */
 static void sched_fair_update_stop_tick(struct rq *rq, struct task_struct *p)
@@ -6792,6 +6850,10 @@ bool cfs_task_bw_constrained(struct task_struct *p)
 __always_inline void sched_notify_critical_section_entry(void) {}
 __always_inline void sched_notify_critical_section_exit(void) {}
 
+static __always_inline void avg_kcs_vruntime_add(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
+static __always_inline void avg_kcs_vruntime_sub(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
+static __always_inline void avg_kcs_vruntime_update(struct cfs_rq *cfs_rq, s64 delta) {}
+
 #endif /* CONFIG_CFS_BANDWIDTH */
 
 #if !defined(CONFIG_CFS_BANDWIDTH) || !defined(CONFIG_NO_HZ_FULL)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index ab16d3d0e51c..22567d236f82 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -658,6 +658,12 @@ struct cfs_rq {
 	s64			avg_vruntime;
 	u64			avg_load;
 
+#ifdef CONFIG_CFS_BANDWIDTH
+	/* EEVDF stats of entities preempted in kernel mode */
+	s64			avg_kcs_vruntime;
+	u64			avg_kcs_load;
+#endif
+
 	u64			min_vruntime;
 #ifdef CONFIG_SCHED_CORE
 	unsigned int		forceidle_seq;
-- 
2.43.0