[v4] Hierarchical Constant Bandwidth Server

[RFC PATCH v4 22/28] sched/deadline: Introduce dl_server_try_pull_f
Posted by Yuri Andriaccio 2 months, 1 week ago
Introduce a new deadline server callback, dl_server_try_pull_f, that
attempts to pull some tasks from other runqueues and returns true if after
this operation the runqueue is not empty.
This function is needed by some scheduling algorithms to guarantee that
they are work-conserving (i.e. whenever there is an idle CPU and a ready
task, this must be immediately scheduled there) or to enforce some other
properties of the scheduling algorithm used on the served runqueue (for
example, for a fixed-priority scheduler the m highest-priority tasks must
be scheduled).

The function is called whenever the dl_server_timer (the runtime
replenishment timer) expires and the deadline server is recharged.
The idea behind this callback is that since the deadline server is being
unthrottled and is becoming able to serve its runqueue, it should
pull tasks from the other runqueues (if there are no runnable tasks on its
own runqueue, or if the tasks on its runqueue have low priority).

The interface of the function provides a sched_dl_entity pointer, i.e. the
deadline server and expects to return true if there are runnable tasks on
that server (also just pulled from other runqueues), false otherwise.

The replenishment timer callback, if there are no runnable tasks for a
given server, will replenish its bandwidth and then stop it.

This callback is not relevant for fair deadline servers.

This fixes the test case where a single hog process, in a cgroup with
reservation 10ms/100ms, runs (without this patch) on only one CPU, while
(with this patch), by definition of the global Fixed Priority scheduling
algorithm, it must run on all (up to a utilization of 100ms/100ms) the
CPUs of the machine.

Co-developed-by: luca abeni <luca.abeni@santannapisa.it>
Signed-off-by: luca abeni <luca.abeni@santannapisa.it>
Signed-off-by: Yuri Andriaccio <yurand2000@gmail.com>
---
 include/linux/sched.h   |  3 ++-
 kernel/sched/deadline.c | 12 ++++++++++++
 kernel/sched/fair.c     |  8 +++++++-
 kernel/sched/rt.c       | 13 ++++++++++++-
 kernel/sched/sched.h    |  6 ++++++
 5 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9ef7797983..62b8586d4f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -633,7 +633,7 @@ struct sched_rt_entity {
 #endif
 } __randomize_layout;
 
-typedef bool (*dl_server_has_tasks_f)(struct sched_dl_entity *);
+typedef bool (*dl_server_try_pull_f)(struct sched_dl_entity *);
 typedef struct task_struct *(*dl_server_pick_f)(struct sched_dl_entity *);
 
 struct sched_dl_entity {
@@ -734,6 +734,7 @@ struct sched_dl_entity {
 	struct dl_rq			*dl_rq;
 	struct rq			*my_q;
 	dl_server_pick_f		server_pick_task;
+	dl_server_try_pull_f		server_try_pull_task;
 
 #ifdef CONFIG_RT_MUTEXES
 	/*
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 082bccc30b..a588fe3bbf 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1295,6 +1295,7 @@ static const u64 dl_server_min_res = 1 * NSEC_PER_MSEC;
 static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_dl_entity *dl_se)
 {
 	struct rq *rq = rq_of_dl_se(dl_se);
+	bool is_active;
 	u64 fw;
 
 	scoped_guard (rq_lock, rq) {
@@ -1309,6 +1310,15 @@ static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_
 		if (!dl_se->dl_runtime)
 			return HRTIMER_NORESTART;
 
+		rq_unpin_lock(rq, rf);
+		is_active = dl_se->server_try_pull_task(dl_se);
+		rq_repin_lock(rq, rf);
+		if (!is_active) {
+			replenish_dl_entity(dl_se);
+			dl_server_stop(dl_se);
+			return HRTIMER_NORESTART;
+		}
+
 		if (dl_se->dl_defer_armed) {
 			/*
 			 * First check if the server could consume runtime in background.
@@ -1712,10 +1722,12 @@ void dl_server_stop(struct sched_dl_entity *dl_se)
 
 void dl_server_init(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq,
 		    struct rq *served_rq,
+		    dl_server_try_pull_f try_pull_task,
 		    dl_server_pick_f pick_task)
 {
 	dl_se->dl_rq = dl_rq;
 	dl_se->my_q  = served_rq;
+	dl_se->server_try_pull_task = try_pull_task;
 	dl_se->server_pick_task = pick_task;
 }
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 9c724d8232..dad46f6bd4 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8957,6 +8957,11 @@ static struct task_struct *__pick_next_task_fair(struct rq *rq, struct task_stru
 	return pick_next_task_fair(rq, prev, NULL);
 }
 
+static bool fair_server_try_pull_task(struct sched_dl_entity *dl_se)
+{
+	return true;
+}
+
 static struct task_struct *fair_server_pick_task(struct sched_dl_entity *dl_se)
 {
 	return pick_task_fair(dl_se->my_q);
@@ -8968,7 +8973,8 @@ void fair_server_init(struct rq *rq)
 
 	init_dl_entity(dl_se);
 
-	dl_server_init(dl_se, &rq->dl, rq, fair_server_pick_task);
+	dl_server_init(dl_se, &rq->dl, rq,
+		       fair_server_try_pull_task, fair_server_pick_task);
 }
 
 /*
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index e2b67f8309..80580b48ab 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -134,6 +134,16 @@ void free_rt_sched_group(struct task_group *tg)
 static struct sched_rt_entity *pick_next_rt_entity(struct rt_rq *rt_rq);
 static inline void set_next_task_rt(struct rq *rq, struct task_struct *p, bool first);
 
+static bool rt_server_try_pull(struct sched_dl_entity *dl_se)
+{
+	struct rt_rq *rt_rq = &dl_se->my_q->rt;
+
+	if (dl_se->my_q->rt.rt_nr_running == 0)
+		group_pull_rt_task(rt_rq);
+
+	return dl_se->my_q->rt.rt_nr_running > 0;
+}
+
 static struct task_struct *rt_server_pick(struct sched_dl_entity *dl_se)
 {
 	struct rt_rq *rt_rq = &dl_se->my_q->rt;
@@ -235,7 +245,8 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
 		dl_se->dl_density = to_ratio(dl_se->dl_period, dl_se->dl_runtime);
 		dl_se->dl_server = 1;
 
-		dl_server_init(dl_se, &cpu_rq(i)->dl, s_rq, rt_server_pick);
+		dl_server_init(dl_se, &cpu_rq(i)->dl, s_rq,
+			       rt_server_try_pull, rt_server_pick);
 	}
 
 	return 1;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index c8eac719eb..c069f6fef0 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -363,6 +363,11 @@ extern s64 dl_scaled_delta_exec(struct rq *rq, struct sched_dl_entity *dl_se, s6
  *
  *   dl_se::rq -- runqueue we belong to.
  *
+ *   dl_se::server_try_pull() -- used on bandwidth enforcement; the server has a
+ *				 chance to pull tasks from the other runqueues,
+ *				 otherwise it is stopped if there is no task to
+ *				 run.
+ *
  *   dl_se::server_pick() -- nested pick_next_task(); we yield the period if this
  *                           returns NULL.
  *
@@ -408,6 +413,7 @@ extern void dl_server_start(struct sched_dl_entity *dl_se);
 extern void dl_server_stop(struct sched_dl_entity *dl_se);
 extern void dl_server_init(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq,
 		    struct rq *served_rq,
+		    dl_server_try_pull_f try_pull_task,
 		    dl_server_pick_f pick_task);
 extern void sched_init_dl_servers(void);
 extern int dl_check_tg(unsigned long total);
-- 
2.51.0