[PATCH V6 1/6] sched/fair: Add trivial fair server

Daniel Bristot de Oliveira posted 6 patches 1 year, 10 months ago
There is a newer version of this series
[PATCH V6 1/6] sched/fair: Add trivial fair server
Posted by Daniel Bristot de Oliveira 1 year, 10 months ago
From: Peter Zijlstra <peterz@infradead.org>

Use deadline servers to service fair tasks.

This patch adds a fair_server deadline entity which acts as a container
for fair entities and can be used to fix starvation when higher priority
(wrt fair) tasks are monopolizing CPU(s).

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Daniel Bristot de Oliveira <bristot@kernel.org>
---
 kernel/sched/core.c     | 24 ++++++++++++++++--------
 kernel/sched/deadline.c | 23 +++++++++++++++++++++++
 kernel/sched/fair.c     | 25 +++++++++++++++++++++++++
 kernel/sched/sched.h    |  4 ++++
 4 files changed, 68 insertions(+), 8 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7019a40457a6..04e2270487b7 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6007,6 +6007,14 @@ static void put_prev_task_balance(struct rq *rq, struct task_struct *prev,
 #endif
 
 	put_prev_task(rq, prev);
+
+	/*
+	 * We've updated @prev and no longer need the server link, clear it.
+	 * Must be done before ->pick_next_task() because that can (re)set
+	 * ->dl_server.
+	 */
+	if (prev->dl_server)
+		prev->dl_server = NULL;
 }
 
 /*
@@ -6037,6 +6045,13 @@ __pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 			p = pick_next_task_idle(rq);
 		}
 
+		/*
+		 * This is a normal CFS pick, but the previous could be a DL pick.
+		 * Clear it as previous is no longer picked.
+		 */
+		if (prev->dl_server)
+			prev->dl_server = NULL;
+
 		/*
 		 * This is the fast path; it cannot be a DL server pick;
 		 * therefore even if @p == @prev, ->dl_server must be NULL.
@@ -6050,14 +6065,6 @@ __pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 restart:
 	put_prev_task_balance(rq, prev, rf);
 
-	/*
-	 * We've updated @prev and no longer need the server link, clear it.
-	 * Must be done before ->pick_next_task() because that can (re)set
-	 * ->dl_server.
-	 */
-	if (prev->dl_server)
-		prev->dl_server = NULL;
-
 	for_each_class(class) {
 		p = class->pick_next_task(rq);
 		if (p)
@@ -10051,6 +10058,7 @@ void __init sched_init(void)
 #endif /* CONFIG_SMP */
 		hrtick_rq_init(rq);
 		atomic_set(&rq->nr_iowait, 0);
+		fair_server_init(rq);
 
 #ifdef CONFIG_SCHED_CORE
 		rq->core = rq;
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index a04a436af8cc..db5dc5c09106 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1382,6 +1382,13 @@ static void update_curr_dl_se(struct rq *rq, struct sched_dl_entity *dl_se, s64
 			resched_curr(rq);
 	}
 
+	/*
+	 * The fair server (sole dl_server) does not account for real-time
+	 * workload because it is running fair work.
+	 */
+	if (dl_se == &rq->fair_server)
+		return;
+
 	/*
 	 * Because -- for now -- we share the rt bandwidth, we need to
 	 * account our runtime there too, otherwise actual rt tasks
@@ -1415,15 +1422,31 @@ void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec)
 
 void dl_server_start(struct sched_dl_entity *dl_se)
 {
+	struct rq *rq = dl_se->rq;
+
 	if (!dl_server(dl_se)) {
+		/* Disabled */
+		dl_se->dl_runtime = 0;
+		dl_se->dl_deadline = 1000 * NSEC_PER_MSEC;
+		dl_se->dl_period = 1000 * NSEC_PER_MSEC;
+
 		dl_se->dl_server = 1;
 		setup_new_dl_entity(dl_se);
 	}
+
+	if (!dl_se->dl_runtime)
+		return;
+
 	enqueue_dl_entity(dl_se, ENQUEUE_WAKEUP);
+	if (!dl_task(dl_se->rq->curr) || dl_entity_preempt(dl_se, &rq->curr->dl))
+		resched_curr(dl_se->rq);
 }
 
 void dl_server_stop(struct sched_dl_entity *dl_se)
 {
+	if (!dl_se->dl_runtime)
+		return;
+
 	dequeue_dl_entity(dl_se, DEQUEUE_SLEEP);
 }
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 03be0d1330a6..304697a80e9e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6722,6 +6722,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 	 */
 	util_est_enqueue(&rq->cfs, p);
 
+	if (!rq->cfs.h_nr_running)
+		dl_server_start(&rq->fair_server);
+
 	/*
 	 * If in_iowait is set, the code below may not trigger any cpufreq
 	 * utilization updates, so do it here explicitly with the IOWAIT flag
@@ -6866,6 +6869,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 		rq->next_balance = jiffies;
 
 dequeue_throttle:
+	if (!rq->cfs.h_nr_running)
+		dl_server_stop(&rq->fair_server);
+
 	util_est_update(&rq->cfs, p, task_sleep);
 	hrtick_update(rq);
 }
@@ -8538,6 +8544,25 @@ static struct task_struct *__pick_next_task_fair(struct rq *rq)
 	return pick_next_task_fair(rq, NULL, NULL);
 }
 
+static bool fair_server_has_tasks(struct sched_dl_entity *dl_se)
+{
+	return !!dl_se->rq->cfs.nr_running;
+}
+
+static struct task_struct *fair_server_pick(struct sched_dl_entity *dl_se)
+{
+	return pick_next_task_fair(dl_se->rq, NULL, NULL);
+}
+
+void fair_server_init(struct rq *rq)
+{
+	struct sched_dl_entity *dl_se = &rq->fair_server;
+
+	init_dl_entity(dl_se);
+
+	dl_server_init(dl_se, rq, fair_server_has_tasks, fair_server_pick);
+}
+
 /*
  * Account for a descheduled task:
  */
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index d2242679239e..205e56929e15 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -340,6 +340,8 @@ extern void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
 		    dl_server_has_tasks_f has_tasks,
 		    dl_server_pick_f pick);
 
+extern void fair_server_init(struct rq *rq);
+
 #ifdef CONFIG_CGROUP_SCHED
 
 struct cfs_rq;
@@ -1016,6 +1018,8 @@ struct rq {
 	struct rt_rq		rt;
 	struct dl_rq		dl;
 
+	struct sched_dl_entity	fair_server;
+
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	/* list of leaf cfs_rq on this CPU: */
 	struct list_head	leaf_cfs_rq_list;
-- 
2.44.0
Re: [PATCH V6 1/6] sched/fair: Add trivial fair server
Posted by Peter Zijlstra 1 year, 10 months ago
On Fri, Apr 05, 2024 at 07:28:00PM +0200, Daniel Bristot de Oliveira wrote:
> From: Peter Zijlstra <peterz@infradead.org>
> 
> Use deadline servers to service fair tasks.
> 
> This patch adds a fair_server deadline entity which acts as a container
> for fair entities and can be used to fix starvation when higher priority
> (wrt fair) tasks are monopolizing CPU(s).
> 
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> Signed-off-by: Daniel Bristot de Oliveira <bristot@kernel.org>
> ---
>  kernel/sched/core.c     | 24 ++++++++++++++++--------
>  kernel/sched/deadline.c | 23 +++++++++++++++++++++++
>  kernel/sched/fair.c     | 25 +++++++++++++++++++++++++
>  kernel/sched/sched.h    |  4 ++++
>  4 files changed, 68 insertions(+), 8 deletions(-)
> 
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 7019a40457a6..04e2270487b7 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -6007,6 +6007,14 @@ static void put_prev_task_balance(struct rq *rq, struct task_struct *prev,
>  #endif
>  
>  	put_prev_task(rq, prev);
> +
> +	/*
> +	 * We've updated @prev and no longer need the server link, clear it.
> +	 * Must be done before ->pick_next_task() because that can (re)set
> +	 * ->dl_server.
> +	 */
> +	if (prev->dl_server)
> +		prev->dl_server = NULL;
>  }
>  
>  /*
> @@ -6037,6 +6045,13 @@ __pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
>  			p = pick_next_task_idle(rq);
>  		}
>  
> +		/*
> +		 * This is a normal CFS pick, but the previous could be a DL pick.
> +		 * Clear it as previous is no longer picked.
> +		 */
> +		if (prev->dl_server)
> +			prev->dl_server = NULL;
> +
>  		/*
>  		 * This is the fast path; it cannot be a DL server pick;
>  		 * therefore even if @p == @prev, ->dl_server must be NULL.
> @@ -6050,14 +6065,6 @@ __pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
>  restart:
>  	put_prev_task_balance(rq, prev, rf);
>  
> -	/*
> -	 * We've updated @prev and no longer need the server link, clear it.
> -	 * Must be done before ->pick_next_task() because that can (re)set
> -	 * ->dl_server.
> -	 */
> -	if (prev->dl_server)
> -		prev->dl_server = NULL;
> -
>  	for_each_class(class) {
>  		p = class->pick_next_task(rq);
>  		if (p)

This bit seems like a fix for 63ba8422f876 ("sched/deadline: Introduce
deadline servers"), should it be a separate patch?
Re: [PATCH V6 1/6] sched/fair: Add trivial fair server
Posted by Daniel Bristot de Oliveira 1 year, 10 months ago
On 4/10/24 19:24, Peter Zijlstra wrote:
> On Fri, Apr 05, 2024 at 07:28:00PM +0200, Daniel Bristot de Oliveira wrote:
>> From: Peter Zijlstra <peterz@infradead.org>
>>
>> Use deadline servers to service fair tasks.
>>
>> This patch adds a fair_server deadline entity which acts as a container
>> for fair entities and can be used to fix starvation when higher priority
>> (wrt fair) tasks are monopolizing CPU(s).
>>
>> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
>> Signed-off-by: Daniel Bristot de Oliveira <bristot@kernel.org>
>> ---
>>  kernel/sched/core.c     | 24 ++++++++++++++++--------
>>  kernel/sched/deadline.c | 23 +++++++++++++++++++++++
>>  kernel/sched/fair.c     | 25 +++++++++++++++++++++++++
>>  kernel/sched/sched.h    |  4 ++++
>>  4 files changed, 68 insertions(+), 8 deletions(-)
>>
>> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
>> index 7019a40457a6..04e2270487b7 100644
>> --- a/kernel/sched/core.c
>> +++ b/kernel/sched/core.c
>> @@ -6007,6 +6007,14 @@ static void put_prev_task_balance(struct rq *rq, struct task_struct *prev,
>>  #endif
>>  
>>  	put_prev_task(rq, prev);
>> +
>> +	/*
>> +	 * We've updated @prev and no longer need the server link, clear it.
>> +	 * Must be done before ->pick_next_task() because that can (re)set
>> +	 * ->dl_server.
>> +	 */
>> +	if (prev->dl_server)
>> +		prev->dl_server = NULL;
>>  }
>>  
>>  /*
>> @@ -6037,6 +6045,13 @@ __pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
>>  			p = pick_next_task_idle(rq);
>>  		}
>>  
>> +		/*
>> +		 * This is a normal CFS pick, but the previous could be a DL pick.
>> +		 * Clear it as previous is no longer picked.
>> +		 */
>> +		if (prev->dl_server)
>> +			prev->dl_server = NULL;
>> +
>>  		/*
>>  		 * This is the fast path; it cannot be a DL server pick;
>>  		 * therefore even if @p == @prev, ->dl_server must be NULL.
>> @@ -6050,14 +6065,6 @@ __pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
>>  restart:
>>  	put_prev_task_balance(rq, prev, rf);
>>  
>> -	/*
>> -	 * We've updated @prev and no longer need the server link, clear it.
>> -	 * Must be done before ->pick_next_task() because that can (re)set
>> -	 * ->dl_server.
>> -	 */
>> -	if (prev->dl_server)
>> -		prev->dl_server = NULL;
>> -
>>  	for_each_class(class) {
>>  		p = class->pick_next_task(rq);
>>  		if (p)
> 
> This bit seems like a fix for 63ba8422f876 ("sched/deadline: Introduce
> deadline servers"), should it be a separate patch?

It was actually reported in a separated patch as a pre-review of this series. So I
think you can pick them as fixes already from there, and add the fixes tag?

https://lore.kernel.org/lkml/20240313012451.1693807-2-joel@joelfernandes.org/
https://lore.kernel.org/lkml/20240313012451.1693807-3-joel@joelfernandes.org/

Also add the Reviewed-by me...

-- Daniel