[v4] Add a deadline server for sched_ext tasks

[PATCH v4 14/15] Remove the ext server BW before changing tasks to FAIR

Posted by Joel Fernandes 3 months, 3 weeks ago

Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
---
 kernel/sched/ext.c | 39 +++++++++++++++++++++++++--------------
 1 file changed, 25 insertions(+), 14 deletions(-)

diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 84ccab8cb838..23e5711bc4fc 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -4960,6 +4960,31 @@ static void scx_disable_workfn(struct kthread_work *work)
 
 	scx_init_task_enabled = false;
 
+	for_each_possible_cpu(cpu) {
+		struct rq *rq = cpu_rq(cpu);
+		struct rq_flags rf;
+
+		/*
+		 * Invalidate all the rq clocks to prevent getting outdated
+		 * rq clocks from a previous scx scheduler.
+		 */
+		scx_rq_clock_invalidate(rq);
+
+		/*
+		 * We are unloading the sched_ext scheduler, we do not need its
+		 * DL server bandwidth anymore, remove it for all CPUs. Whenever
+		 * the first SCX task is enqueued (when scx is re-loaded), its DL
+		 * server bandwidth will be re-initialized.
+		 */
+		rq_lock_irqsave(rq, &rf);
+		update_rq_clock(rq);
+		if (dl_server_active(&rq->ext_server))
+			dl_server_stop(&rq->ext_server);
+		dl_server_remove_params(&rq->ext_server);
+		rq_unlock_irqrestore(rq, &rf);
+	}
+
+
 	scx_task_iter_start(&sti);
 	while ((p = scx_task_iter_next_locked(&sti))) {
 		const struct sched_class *old_class = p->sched_class;
@@ -4985,26 +5010,12 @@ static void scx_disable_workfn(struct kthread_work *work)
 
 	for_each_possible_cpu(cpu) {
 		struct rq *rq = cpu_rq(cpu);
-		struct rq_flags rf;
 
 		/*
 		 * Invalidate all the rq clocks to prevent getting outdated
 		 * rq clocks from a previous scx scheduler.
 		 */
 		scx_rq_clock_invalidate(rq);
-
-		/*
-		 * We are unloading the sched_ext scheduler, we do not need its
-		 * DL server bandwidth anymore, remove it for all CPUs. Whenever
-		 * the first SCX task is enqueued (when scx is re-loaded), its DL
-		 * server bandwidth will be re-initialized.
-		 */
-		rq_lock_irqsave(rq, &rf);
-		update_rq_clock(rq);
-		if (dl_server_active(&rq->ext_server))
-			dl_server_stop(&rq->ext_server);
-		dl_server_remove_params(&rq->ext_server);
-		rq_unlock_irqrestore(rq, &rf);
 	}
 
 	/* no task is on scx, turn off all the switches and flush in-progress calls */
-- 
2.43.0

Re: [PATCH v4 14/15] Remove the ext server BW before changing tasks to FAIR

Posted by Andrea Righi 3 months, 3 weeks ago

Hi Joel,

On Tue, Jun 17, 2025 at 04:05:17PM -0400, Joel Fernandes wrote:
> Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
> ---
>  kernel/sched/ext.c | 39 +++++++++++++++++++++++++--------------
>  1 file changed, 25 insertions(+), 14 deletions(-)
> 
> diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
> index 84ccab8cb838..23e5711bc4fc 100644
> --- a/kernel/sched/ext.c
> +++ b/kernel/sched/ext.c
> @@ -4960,6 +4960,31 @@ static void scx_disable_workfn(struct kthread_work *work)
>  
>  	scx_init_task_enabled = false;
>  
> +	for_each_possible_cpu(cpu) {
> +		struct rq *rq = cpu_rq(cpu);
> +		struct rq_flags rf;
> +
> +		/*
> +		 * Invalidate all the rq clocks to prevent getting outdated
> +		 * rq clocks from a previous scx scheduler.
> +		 */
> +		scx_rq_clock_invalidate(rq);

We're also calling scx_rq_clock_invalidate(rq) twice (see below).

> +
> +		/*
> +		 * We are unloading the sched_ext scheduler, we do not need its
> +		 * DL server bandwidth anymore, remove it for all CPUs. Whenever
> +		 * the first SCX task is enqueued (when scx is re-loaded), its DL
> +		 * server bandwidth will be re-initialized.
> +		 */
> +		rq_lock_irqsave(rq, &rf);
> +		update_rq_clock(rq);
> +		if (dl_server_active(&rq->ext_server))
> +			dl_server_stop(&rq->ext_server);
> +		dl_server_remove_params(&rq->ext_server);
> +		rq_unlock_irqrestore(rq, &rf);
> +	}
> +
> +
>  	scx_task_iter_start(&sti);
>  	while ((p = scx_task_iter_next_locked(&sti))) {
>  		const struct sched_class *old_class = p->sched_class;
> @@ -4985,26 +5010,12 @@ static void scx_disable_workfn(struct kthread_work *work)
>  
>  	for_each_possible_cpu(cpu) {
>  		struct rq *rq = cpu_rq(cpu);
> -		struct rq_flags rf;
>  
>  		/*
>  		 * Invalidate all the rq clocks to prevent getting outdated
>  		 * rq clocks from a previous scx scheduler.
>  		 */
>  		scx_rq_clock_invalidate(rq);
> -
> -		/*
> -		 * We are unloading the sched_ext scheduler, we do not need its
> -		 * DL server bandwidth anymore, remove it for all CPUs. Whenever
> -		 * the first SCX task is enqueued (when scx is re-loaded), its DL
> -		 * server bandwidth will be re-initialized.
> -		 */
> -		rq_lock_irqsave(rq, &rf);
> -		update_rq_clock(rq);
> -		if (dl_server_active(&rq->ext_server))
> -			dl_server_stop(&rq->ext_server);
> -		dl_server_remove_params(&rq->ext_server);
> -		rq_unlock_irqrestore(rq, &rf);
>  	}

We should probably remove this for_each_possible_cpu() completely.

>  
>  	/* no task is on scx, turn off all the switches and flush in-progress calls */
> -- 
> 2.43.0
> 

Thanks,
-Andrea

Re: [PATCH v4 14/15] Remove the ext server BW before changing tasks to FAIR

Posted by Joel Fernandes 3 months, 3 weeks ago


On 6/17/2025 4:42 PM, Andrea Righi wrote:
>> +
>> +		/*
>> +		 * We are unloading the sched_ext scheduler, we do not need its
>> +		 * DL server bandwidth anymore, remove it for all CPUs. Whenever
>> +		 * the first SCX task is enqueued (when scx is re-loaded), its DL
>> +		 * server bandwidth will be re-initialized.
>> +		 */
>> +		rq_lock_irqsave(rq, &rf);
>> +		update_rq_clock(rq);
>> +		if (dl_server_active(&rq->ext_server))
>> +			dl_server_stop(&rq->ext_server);
>> +		dl_server_remove_params(&rq->ext_server);
>> +		rq_unlock_irqrestore(rq, &rf);
>> +	}
>> +
>> +
>>  	scx_task_iter_start(&sti);
>>  	while ((p = scx_task_iter_next_locked(&sti))) {
>>  		const struct sched_class *old_class = p->sched_class;
>> @@ -4985,26 +5010,12 @@ static void scx_disable_workfn(struct kthread_work *work)
>>  
>>  	for_each_possible_cpu(cpu) {
>>  		struct rq *rq = cpu_rq(cpu);
>> -		struct rq_flags rf;
>>  
>>  		/*
>>  		 * Invalidate all the rq clocks to prevent getting outdated
>>  		 * rq clocks from a previous scx scheduler.
>>  		 */
>>  		scx_rq_clock_invalidate(rq);
>> -
>> -		/*
>> -		 * We are unloading the sched_ext scheduler, we do not need its
>> -		 * DL server bandwidth anymore, remove it for all CPUs. Whenever
>> -		 * the first SCX task is enqueued (when scx is re-loaded), its DL
>> -		 * server bandwidth will be re-initialized.
>> -		 */
>> -		rq_lock_irqsave(rq, &rf);
>> -		update_rq_clock(rq);
>> -		if (dl_server_active(&rq->ext_server))
>> -			dl_server_stop(&rq->ext_server);
>> -		dl_server_remove_params(&rq->ext_server);
>> -		rq_unlock_irqrestore(rq, &rf);
>>  	}
>
> We should probably remove this for_each_possible_cpu() completely.

Ah right, we only need to call scx_rq_clock_invalidate(rq); per its comment:

+		/*
+		 * Invalidate all the rq clocks to prevent getting outdated
+		 * rq clocks from a previous scx scheduler.
+		 */

So I'll get rid of it and run some tests, thanks!

 - Joel

Re: [PATCH v4 14/15] Remove the ext server BW before changing tasks to FAIR

Posted by Joel Fernandes 3 months, 3 weeks ago

Sigh, this patch was supposed to be squashed into the "Relinquish DL server
reservations when not needed".

I have updated it in my tree and also squashed the rq_clock patch into the same
to prevent conflicts.

https://git.kernel.org/pub/scm/linux/kernel/git/jfern/linux.git/log/?h=sched/scx-dlserver-boost-rebase

But its a minor thing, and we can review the v4 I sent.

thanks,

 - Joel

On 6/17/2025 4:05 PM, Joel Fernandes wrote:
> Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
> ---
>  kernel/sched/ext.c | 39 +++++++++++++++++++++++++--------------
>  1 file changed, 25 insertions(+), 14 deletions(-)
> 
> diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
> index 84ccab8cb838..23e5711bc4fc 100644
> --- a/kernel/sched/ext.c
> +++ b/kernel/sched/ext.c
> @@ -4960,6 +4960,31 @@ static void scx_disable_workfn(struct kthread_work *work)
>  
>  	scx_init_task_enabled = false;
>  
> +	for_each_possible_cpu(cpu) {
> +		struct rq *rq = cpu_rq(cpu);
> +		struct rq_flags rf;
> +
> +		/*
> +		 * Invalidate all the rq clocks to prevent getting outdated
> +		 * rq clocks from a previous scx scheduler.
> +		 */
> +		scx_rq_clock_invalidate(rq);
> +
> +		/*
> +		 * We are unloading the sched_ext scheduler, we do not need its
> +		 * DL server bandwidth anymore, remove it for all CPUs. Whenever
> +		 * the first SCX task is enqueued (when scx is re-loaded), its DL
> +		 * server bandwidth will be re-initialized.
> +		 */
> +		rq_lock_irqsave(rq, &rf);
> +		update_rq_clock(rq);
> +		if (dl_server_active(&rq->ext_server))
> +			dl_server_stop(&rq->ext_server);
> +		dl_server_remove_params(&rq->ext_server);
> +		rq_unlock_irqrestore(rq, &rf);
> +	}
> +
> +
>  	scx_task_iter_start(&sti);
>  	while ((p = scx_task_iter_next_locked(&sti))) {
>  		const struct sched_class *old_class = p->sched_class;
> @@ -4985,26 +5010,12 @@ static void scx_disable_workfn(struct kthread_work *work)
>  
>  	for_each_possible_cpu(cpu) {
>  		struct rq *rq = cpu_rq(cpu);
> -		struct rq_flags rf;
>  
>  		/*
>  		 * Invalidate all the rq clocks to prevent getting outdated
>  		 * rq clocks from a previous scx scheduler.
>  		 */
>  		scx_rq_clock_invalidate(rq);
> -
> -		/*
> -		 * We are unloading the sched_ext scheduler, we do not need its
> -		 * DL server bandwidth anymore, remove it for all CPUs. Whenever
> -		 * the first SCX task is enqueued (when scx is re-loaded), its DL
> -		 * server bandwidth will be re-initialized.
> -		 */
> -		rq_lock_irqsave(rq, &rf);
> -		update_rq_clock(rq);
> -		if (dl_server_active(&rq->ext_server))
> -			dl_server_stop(&rq->ext_server);
> -		dl_server_remove_params(&rq->ext_server);
> -		rq_unlock_irqrestore(rq, &rf);
>  	}
>  
>  	/* no task is on scx, turn off all the switches and flush in-progress calls */