[PATCH 04/12] sched: Cleanup sched_delayed handling for class switches

Peter Zijlstra posted 12 patches 4 months ago
[PATCH 04/12] sched: Cleanup sched_delayed handling for class switches
Posted by Peter Zijlstra 4 months ago
Use the new sched_class::switching_from() method to dequeue delayed
tasks before switching to another class.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Tejun Heo <tj@kernel.org>
---
 kernel/sched/core.c     |   12 ++++++++----
 kernel/sched/ext.c      |    6 ------
 kernel/sched/fair.c     |    7 +++++++
 kernel/sched/syscalls.c |    3 ---
 4 files changed, 15 insertions(+), 13 deletions(-)

--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7368,9 +7368,6 @@ void rt_mutex_setprio(struct task_struct
 		queue_flag &= ~(DEQUEUE_SAVE | DEQUEUE_MOVE);
 	}
 
-	if (prev_class != next_class && p->se.sched_delayed)
-		dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK);
-
 	scoped_guard (sched_change, p, queue_flag) {
 		/*
 		 * Boosting condition are:
@@ -10845,8 +10842,15 @@ struct sched_change_ctx *sched_change_be
 		if (WARN_ON_ONCE(flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)))
 			flags &= ~(DEQUEUE_SAVE | DEQUEUE_MOVE);
 
-		if (p->sched_class->switching_from)
+		if (p->sched_class->switching_from) {
+			/*
+			 * switching_from_fair() assumes CLASS implies NOCLOCK;
+			 * fixing this assumption would mean switching_from()
+			 * would need to be able to change flags.
+			 */
+			WARN_ON(!(flags & DEQUEUE_NOCLOCK));
 			p->sched_class->switching_from(rq, p);
+		}
 	}
 
 	*ctx = (struct sched_change_ctx){
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -3924,9 +3924,6 @@ static void scx_disable_workfn(struct kt
 			queue_flags &= ~(DEQUEUE_SAVE | DEQUEUE_MOVE);
 		}
 
-		if (old_class != new_class && p->se.sched_delayed)
-			dequeue_task(task_rq(p), p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK);
-
 		scoped_guard (sched_change, p, queue_flags) {
 			p->sched_class = new_class;
 		}
@@ -4677,9 +4674,6 @@ static int scx_enable(struct sched_ext_o
 			queue_flags &= ~(DEQUEUE_SAVE | DEQUEUE_MOVE);
 		}
 
-		if (old_class != new_class && p->se.sched_delayed)
-			dequeue_task(task_rq(p), p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK);
-
 		scoped_guard (sched_change, p, queue_flags) {
 			p->scx.slice = SCX_SLICE_DFL;
 			p->sched_class = new_class;
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -13237,6 +13237,12 @@ static void attach_task_cfs_rq(struct ta
 	attach_entity_cfs_rq(se);
 }
 
+static void switching_from_fair(struct rq *rq, struct task_struct *p)
+{
+	if (p->se.sched_delayed)
+		dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK);
+}
+
 static void switched_from_fair(struct rq *rq, struct task_struct *p)
 {
 	detach_task_cfs_rq(p);
@@ -13638,6 +13644,7 @@ DEFINE_SCHED_CLASS(fair) = {
 
 	.reweight_task		= reweight_task_fair,
 	.prio_changed		= prio_changed_fair,
+	.switching_from		= switching_from_fair,
 	.switched_from		= switched_from_fair,
 	.switched_to		= switched_to_fair,
 
--- a/kernel/sched/syscalls.c
+++ b/kernel/sched/syscalls.c
@@ -689,9 +689,6 @@ int __sched_setscheduler(struct task_str
 		queue_flags &= ~(DEQUEUE_SAVE | DEQUEUE_MOVE);
 	}
 
-	if (prev_class != next_class && p->se.sched_delayed)
-		dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK);
-
 	scoped_guard (sched_change, p, queue_flags) {
 
 		if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) {
Re: [PATCH 04/12] sched: Cleanup sched_delayed handling for class switches
Posted by Vincent Guittot 4 months ago
On Mon, 6 Oct 2025 at 12:45, Peter Zijlstra <peterz@infradead.org> wrote:
>
> Use the new sched_class::switching_from() method to dequeue delayed
> tasks before switching to another class.
>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> Acked-by: Tejun Heo <tj@kernel.org>

Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>

> ---
>  kernel/sched/core.c     |   12 ++++++++----
>  kernel/sched/ext.c      |    6 ------
>  kernel/sched/fair.c     |    7 +++++++
>  kernel/sched/syscalls.c |    3 ---
>  4 files changed, 15 insertions(+), 13 deletions(-)
>
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -7368,9 +7368,6 @@ void rt_mutex_setprio(struct task_struct
>                 queue_flag &= ~(DEQUEUE_SAVE | DEQUEUE_MOVE);
>         }
>
> -       if (prev_class != next_class && p->se.sched_delayed)
> -               dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK);
> -
>         scoped_guard (sched_change, p, queue_flag) {
>                 /*
>                  * Boosting condition are:
> @@ -10845,8 +10842,15 @@ struct sched_change_ctx *sched_change_be
>                 if (WARN_ON_ONCE(flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)))
>                         flags &= ~(DEQUEUE_SAVE | DEQUEUE_MOVE);
>
> -               if (p->sched_class->switching_from)
> +               if (p->sched_class->switching_from) {
> +                       /*
> +                        * switching_from_fair() assumes CLASS implies NOCLOCK;
> +                        * fixing this assumption would mean switching_from()
> +                        * would need to be able to change flags.
> +                        */
> +                       WARN_ON(!(flags & DEQUEUE_NOCLOCK));
>                         p->sched_class->switching_from(rq, p);
> +               }
>         }
>
>         *ctx = (struct sched_change_ctx){
> --- a/kernel/sched/ext.c
> +++ b/kernel/sched/ext.c
> @@ -3924,9 +3924,6 @@ static void scx_disable_workfn(struct kt
>                         queue_flags &= ~(DEQUEUE_SAVE | DEQUEUE_MOVE);
>                 }
>
> -               if (old_class != new_class && p->se.sched_delayed)
> -                       dequeue_task(task_rq(p), p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK);
> -
>                 scoped_guard (sched_change, p, queue_flags) {
>                         p->sched_class = new_class;
>                 }
> @@ -4677,9 +4674,6 @@ static int scx_enable(struct sched_ext_o
>                         queue_flags &= ~(DEQUEUE_SAVE | DEQUEUE_MOVE);
>                 }
>
> -               if (old_class != new_class && p->se.sched_delayed)
> -                       dequeue_task(task_rq(p), p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK);
> -
>                 scoped_guard (sched_change, p, queue_flags) {
>                         p->scx.slice = SCX_SLICE_DFL;
>                         p->sched_class = new_class;
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -13237,6 +13237,12 @@ static void attach_task_cfs_rq(struct ta
>         attach_entity_cfs_rq(se);
>  }
>
> +static void switching_from_fair(struct rq *rq, struct task_struct *p)
> +{
> +       if (p->se.sched_delayed)
> +               dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK);
> +}
> +
>  static void switched_from_fair(struct rq *rq, struct task_struct *p)
>  {
>         detach_task_cfs_rq(p);
> @@ -13638,6 +13644,7 @@ DEFINE_SCHED_CLASS(fair) = {
>
>         .reweight_task          = reweight_task_fair,
>         .prio_changed           = prio_changed_fair,
> +       .switching_from         = switching_from_fair,
>         .switched_from          = switched_from_fair,
>         .switched_to            = switched_to_fair,
>
> --- a/kernel/sched/syscalls.c
> +++ b/kernel/sched/syscalls.c
> @@ -689,9 +689,6 @@ int __sched_setscheduler(struct task_str
>                 queue_flags &= ~(DEQUEUE_SAVE | DEQUEUE_MOVE);
>         }
>
> -       if (prev_class != next_class && p->se.sched_delayed)
> -               dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK);
> -
>         scoped_guard (sched_change, p, queue_flags) {
>
>                 if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) {
>
>
[tip: sched/core] sched: Cleanup sched_delayed handling for class switches
Posted by tip-bot2 for Peter Zijlstra 3 months, 3 weeks ago
The following commit has been merged into the sched/core branch of tip:

Commit-ID:     1ae5f5dfe5adc64a90b1b0ab5bd9bd7c9d140c28
Gitweb:        https://git.kernel.org/tip/1ae5f5dfe5adc64a90b1b0ab5bd9bd7c9d140c28
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Wed, 30 Oct 2024 15:47:46 +01:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Thu, 16 Oct 2025 11:13:51 +02:00

sched: Cleanup sched_delayed handling for class switches

Use the new sched_class::switching_from() method to dequeue delayed
tasks before switching to another class.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
Reviewed-by: Juri Lelli <juri.lelli@redhat.com>
Acked-by: Tejun Heo <tj@kernel.org>
---
 kernel/sched/core.c     | 12 ++++++++----
 kernel/sched/ext.c      |  6 ------
 kernel/sched/fair.c     |  7 +++++++
 kernel/sched/syscalls.c |  3 ---
 4 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 4dbd206..bd2c551 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7366,9 +7366,6 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
 	if (prev_class != next_class)
 		queue_flag |= DEQUEUE_CLASS;
 
-	if (prev_class != next_class && p->se.sched_delayed)
-		dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK);
-
 	scoped_guard (sched_change, p, queue_flag) {
 		/*
 		 * Boosting condition are:
@@ -10840,8 +10837,15 @@ struct sched_change_ctx *sched_change_begin(struct task_struct *p, unsigned int 
 	lockdep_assert_rq_held(rq);
 
 	if (flags & DEQUEUE_CLASS) {
-		if (p->sched_class->switching_from)
+		if (p->sched_class->switching_from) {
+			/*
+			 * switching_from_fair() assumes CLASS implies NOCLOCK;
+			 * fixing this assumption would mean switching_from()
+			 * would need to be able to change flags.
+			 */
+			WARN_ON(!(flags & DEQUEUE_NOCLOCK));
 			p->sched_class->switching_from(rq, p);
+		}
 	}
 
 	*ctx = (struct sched_change_ctx){
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index a408c39..b0a1e2a 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -3922,9 +3922,6 @@ static void scx_disable_workfn(struct kthread_work *work)
 		if (old_class != new_class)
 			queue_flags |= DEQUEUE_CLASS;
 
-		if (old_class != new_class && p->se.sched_delayed)
-			dequeue_task(task_rq(p), p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK);
-
 		scoped_guard (sched_change, p, queue_flags) {
 			p->sched_class = new_class;
 		}
@@ -4673,9 +4670,6 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link)
 		if (old_class != new_class)
 			queue_flags |= DEQUEUE_CLASS;
 
-		if (old_class != new_class && p->se.sched_delayed)
-			dequeue_task(task_rq(p), p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK);
-
 		scoped_guard (sched_change, p, queue_flags) {
 			p->scx.slice = SCX_SLICE_DFL;
 			p->sched_class = new_class;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ac881df..6c462e4 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -13249,6 +13249,12 @@ static void attach_task_cfs_rq(struct task_struct *p)
 	attach_entity_cfs_rq(se);
 }
 
+static void switching_from_fair(struct rq *rq, struct task_struct *p)
+{
+	if (p->se.sched_delayed)
+		dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK);
+}
+
 static void switched_from_fair(struct rq *rq, struct task_struct *p)
 {
 	detach_task_cfs_rq(p);
@@ -13650,6 +13656,7 @@ DEFINE_SCHED_CLASS(fair) = {
 
 	.reweight_task		= reweight_task_fair,
 	.prio_changed		= prio_changed_fair,
+	.switching_from		= switching_from_fair,
 	.switched_from		= switched_from_fair,
 	.switched_to		= switched_to_fair,
 
diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c
index bcef5c7..6583faf 100644
--- a/kernel/sched/syscalls.c
+++ b/kernel/sched/syscalls.c
@@ -687,9 +687,6 @@ change:
 	if (prev_class != next_class)
 		queue_flags |= DEQUEUE_CLASS;
 
-	if (prev_class != next_class && p->se.sched_delayed)
-		dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK);
-
 	scoped_guard (sched_change, p, queue_flags) {
 
 		if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) {