[PATCH v3 06/21] sched/cache: Track LLC-preferred tasks per runqueue

Tim Chen posted 21 patches 1 month, 2 weeks ago
[PATCH v3 06/21] sched/cache: Track LLC-preferred tasks per runqueue
Posted by Tim Chen 1 month, 2 weeks ago
For each runqueue, track the number of tasks with an LLC preference
and how many of them are running on their preferred LLC. This mirrors
nr_numa_running and nr_preferred_running for NUMA balancing, and will
be used by cache-aware load balancing in later patches.

Co-developed-by: Chen Yu <yu.c.chen@intel.com>
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
---

Notes:
    v2->v3:
    Remove the sched_cache_enabled() check and make the
    account_llc_{en,de}queue() depending on CONFIG_SCHED_CACHE,
    so sched_llc_active in v2 can be removed.
    (Peter Zijlstra)

 kernel/sched/core.c  |  5 +++++
 kernel/sched/fair.c  | 48 +++++++++++++++++++++++++++++++++++++++++---
 kernel/sched/sched.h |  6 ++++++
 3 files changed, 56 insertions(+), 3 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c6efa71cf500..c464e370576f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -532,6 +532,11 @@ void __trace_set_current_state(int state_value)
 }
 EXPORT_SYMBOL(__trace_set_current_state);
 
+int task_llc(const struct task_struct *p)
+{
+	return per_cpu(sd_llc_id, task_cpu(p));
+}
+
 /*
  * Serialization rules:
  *
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 0b4ed0f2809d..6ad9ad2f918f 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1199,6 +1199,30 @@ static int llc_id(int cpu)
 	return per_cpu(sd_llc_id, cpu);
 }
 
+static void account_llc_enqueue(struct rq *rq, struct task_struct *p)
+{
+	int pref_llc;
+
+	pref_llc = p->preferred_llc;
+	if (pref_llc < 0)
+		return;
+
+	rq->nr_llc_running++;
+	rq->nr_pref_llc_running += (pref_llc == task_llc(p));
+}
+
+static void account_llc_dequeue(struct rq *rq, struct task_struct *p)
+{
+	int pref_llc;
+
+	pref_llc = p->preferred_llc;
+	if (pref_llc < 0)
+		return;
+
+	rq->nr_llc_running--;
+	rq->nr_pref_llc_running -= (pref_llc == task_llc(p));
+}
+
 void mm_init_sched(struct mm_struct *mm,
 		   struct sched_cache_time __percpu *_pcpu_sched)
 {
@@ -1304,6 +1328,8 @@ static int get_pref_llc(struct task_struct *p, struct mm_struct *mm)
 	return mm_sched_llc;
 }
 
+static unsigned int task_running_on_cpu(int cpu, struct task_struct *p);
+
 static inline
 void account_mm_sched(struct rq *rq, struct task_struct *p, s64 delta_exec)
 {
@@ -1346,8 +1372,13 @@ void account_mm_sched(struct rq *rq, struct task_struct *p, s64 delta_exec)
 
 	mm_sched_llc = get_pref_llc(p, mm);
 
-	if (p->preferred_llc != mm_sched_llc)
+	/* task not on rq accounted later in account_entity_enqueue() */
+	if (task_running_on_cpu(rq->cpu, p) &&
+	    p->preferred_llc != mm_sched_llc) {
+		account_llc_dequeue(rq, p);
 		p->preferred_llc = mm_sched_llc;
+		account_llc_enqueue(rq, p);
+	}
 }
 
 static void task_tick_cache(struct rq *rq, struct task_struct *p)
@@ -1482,6 +1513,11 @@ static inline int get_pref_llc(struct task_struct *p,
 {
 	return -1;
 }
+
+static void account_llc_enqueue(struct rq *rq, struct task_struct *p) {}
+
+static void account_llc_dequeue(struct rq *rq, struct task_struct *p) {}
+
 #endif
 
 /*
@@ -3970,9 +4006,11 @@ account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	update_load_add(&cfs_rq->load, se->load.weight);
 	if (entity_is_task(se)) {
+		struct task_struct *p = task_of(se);
 		struct rq *rq = rq_of(cfs_rq);
 
-		account_numa_enqueue(rq, task_of(se));
+		account_numa_enqueue(rq, p);
+		account_llc_enqueue(rq, p);
 		list_add(&se->group_node, &rq->cfs_tasks);
 	}
 	cfs_rq->nr_queued++;
@@ -3983,7 +4021,11 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	update_load_sub(&cfs_rq->load, se->load.weight);
 	if (entity_is_task(se)) {
-		account_numa_dequeue(rq_of(cfs_rq), task_of(se));
+		struct task_struct *p = task_of(se);
+		struct rq *rq = rq_of(cfs_rq);
+
+		account_numa_dequeue(rq, p);
+		account_llc_dequeue(rq, p);
 		list_del_init(&se->group_node);
 	}
 	cfs_rq->nr_queued--;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index de5b701c3950..35cea6aa32a4 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1128,6 +1128,10 @@ struct rq {
 	unsigned int		nr_preferred_running;
 	unsigned int		numa_migrate_on;
 #endif
+#ifdef CONFIG_SCHED_CACHE
+	unsigned int		nr_pref_llc_running;
+	unsigned int		nr_llc_running;
+#endif
 #ifdef CONFIG_NO_HZ_COMMON
 	unsigned long		last_blocked_load_update_tick;
 	unsigned int		has_blocked_load;
@@ -1996,6 +2000,8 @@ init_numa_balancing(u64 clone_flags, struct task_struct *p)
 
 #endif /* !CONFIG_NUMA_BALANCING */
 
+int task_llc(const struct task_struct *p);
+
 static inline void
 queue_balance_callback(struct rq *rq,
 		       struct balance_callback *head,
-- 
2.32.0