clean up class switching

[RFC][PATCH 2/6] sched: Employ sched_change guards

Posted by Peter Zijlstra 3 weeks, 4 days ago

As proposed a long while ago -- and half done by scx -- wrap the
scheduler's 'change' pattern in a guard helper.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 include/linux/cleanup.h |    5 +
 kernel/sched/core.c     |  158 ++++++++++++++++++------------------------------
 kernel/sched/ext.c      |   33 +++-------
 kernel/sched/sched.h    |   21 +++---
 kernel/sched/syscalls.c |   65 ++++++-------------
 5 files changed, 112 insertions(+), 170 deletions(-)

--- a/include/linux/cleanup.h
+++ b/include/linux/cleanup.h
@@ -297,6 +297,11 @@ static inline class_##_name##_t class_##
 #define __DEFINE_CLASS_IS_CONDITIONAL(_name, _is_cond)	\
 static __maybe_unused const bool class_##_name##_is_conditional = _is_cond
 
+#define DEFINE_CLASS_IS_UNCONDITIONAL(_name)		\
+	__DEFINE_CLASS_IS_CONDITIONAL(_name, false);	\
+	static inline void * class_##_name##_lock_ptr(class_##_name##_t *_T) \
+	{ return (void *)1; }
+
 #define DEFINE_GUARD(_name, _type, _lock, _unlock) \
 	__DEFINE_CLASS_IS_CONDITIONAL(_name, false); \
 	DEFINE_CLASS(_name, _type, if (_T) { _unlock; }, ({ _lock; _T; }), _type _T); \
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7099,7 +7099,7 @@ void rt_mutex_post_schedule(void)
  */
 void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
 {
-	int prio, oldprio, queued, running, queue_flag =
+	int prio, oldprio, queue_flag =
 		DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
 	const struct sched_class *prev_class, *next_class;
 	struct rq_flags rf;
@@ -7164,52 +7164,42 @@ void rt_mutex_setprio(struct task_struct
 	if (prev_class != next_class && p->se.sched_delayed)
 		dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK);
 
-	queued = task_on_rq_queued(p);
-	running = task_current_donor(rq, p);
-	if (queued)
-		dequeue_task(rq, p, queue_flag);
-	if (running)
-		put_prev_task(rq, p);
-
-	/*
-	 * Boosting condition are:
-	 * 1. -rt task is running and holds mutex A
-	 *      --> -dl task blocks on mutex A
-	 *
-	 * 2. -dl task is running and holds mutex A
-	 *      --> -dl task blocks on mutex A and could preempt the
-	 *          running task
-	 */
-	if (dl_prio(prio)) {
-		if (!dl_prio(p->normal_prio) ||
-		    (pi_task && dl_prio(pi_task->prio) &&
-		     dl_entity_preempt(&pi_task->dl, &p->dl))) {
-			p->dl.pi_se = pi_task->dl.pi_se;
-			queue_flag |= ENQUEUE_REPLENISH;
+	scoped_guard (sched_change, p, queue_flag) {
+		/*
+		 * Boosting condition are:
+		 * 1. -rt task is running and holds mutex A
+		 *      --> -dl task blocks on mutex A
+		 *
+		 * 2. -dl task is running and holds mutex A
+		 *      --> -dl task blocks on mutex A and could preempt the
+		 *          running task
+		 */
+		if (dl_prio(prio)) {
+			if (!dl_prio(p->normal_prio) ||
+			    (pi_task && dl_prio(pi_task->prio) &&
+			     dl_entity_preempt(&pi_task->dl, &p->dl))) {
+				p->dl.pi_se = pi_task->dl.pi_se;
+				scope.flags |= ENQUEUE_REPLENISH;
+			} else {
+				p->dl.pi_se = &p->dl;
+			}
+		} else if (rt_prio(prio)) {
+			if (dl_prio(oldprio))
+				p->dl.pi_se = &p->dl;
+			if (oldprio < prio)
+				scope.flags |= ENQUEUE_HEAD;
 		} else {
-			p->dl.pi_se = &p->dl;
+			if (dl_prio(oldprio))
+				p->dl.pi_se = &p->dl;
+			if (rt_prio(oldprio))
+				p->rt.timeout = 0;
 		}
-	} else if (rt_prio(prio)) {
-		if (dl_prio(oldprio))
-			p->dl.pi_se = &p->dl;
-		if (oldprio < prio)
-			queue_flag |= ENQUEUE_HEAD;
-	} else {
-		if (dl_prio(oldprio))
-			p->dl.pi_se = &p->dl;
-		if (rt_prio(oldprio))
-			p->rt.timeout = 0;
-	}
-
-	p->sched_class = next_class;
-	p->prio = prio;
 
-	check_class_changing(rq, p, prev_class);
+		p->sched_class = next_class;
+		p->prio = prio;
 
-	if (queued)
-		enqueue_task(rq, p, queue_flag);
-	if (running)
-		set_next_task(rq, p);
+		check_class_changing(rq, p, prev_class);
+	}
 
 	check_class_changed(rq, p, prev_class, oldprio);
 out_unlock:
@@ -7819,26 +7809,9 @@ int migrate_task_to(struct task_struct *
  */
 void sched_setnuma(struct task_struct *p, int nid)
 {
-	bool queued, running;
-	struct rq_flags rf;
-	struct rq *rq;
-
-	rq = task_rq_lock(p, &rf);
-	queued = task_on_rq_queued(p);
-	running = task_current_donor(rq, p);
-
-	if (queued)
-		dequeue_task(rq, p, DEQUEUE_SAVE);
-	if (running)
-		put_prev_task(rq, p);
-
-	p->numa_preferred_nid = nid;
-
-	if (queued)
-		enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK);
-	if (running)
-		set_next_task(rq, p);
-	task_rq_unlock(rq, p, &rf);
+	guard(task_rq_lock)(p);
+	scoped_guard (sched_change, p, DEQUEUE_SAVE)
+		p->numa_preferred_nid = nid;
 }
 #endif /* CONFIG_NUMA_BALANCING */
 
@@ -8957,9 +8930,10 @@ static void sched_change_group(struct ta
  */
 void sched_move_task(struct task_struct *tsk)
 {
-	int queued, running, queue_flags =
+	unsigned int queue_flags =
 		DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
 	struct task_group *group;
+	bool resched = false;
 	struct rq *rq;
 
 	CLASS(task_rq_lock, rq_guard)(tsk);
@@ -8975,21 +8949,14 @@ void sched_move_task(struct task_struct
 
 	update_rq_clock(rq);
 
-	running = task_current_donor(rq, tsk);
-	queued = task_on_rq_queued(tsk);
+	scoped_guard (sched_change, tsk, queue_flags) {
+		sched_change_group(tsk, group);
+		scx_move_task(tsk);
+		if (scope.running)
+			resched = true;
+	}
 
-	if (queued)
-		dequeue_task(rq, tsk, queue_flags);
-	if (running)
-		put_prev_task(rq, tsk);
-
-	sched_change_group(tsk, group);
-	scx_move_task(tsk);
-
-	if (queued)
-		enqueue_task(rq, tsk, queue_flags);
-	if (running) {
-		set_next_task(rq, tsk);
+	if (resched) {
 		/*
 		 * After changing group, the running task may have joined a
 		 * throttled one but it's still the running task. Trigger a
@@ -10580,37 +10547,34 @@ void sched_mm_cid_fork(struct task_struc
 }
 #endif
 
-#ifdef CONFIG_SCHED_CLASS_EXT
-void sched_deq_and_put_task(struct task_struct *p, int queue_flags,
-			    struct sched_enq_and_set_ctx *ctx)
+struct sched_change_ctx sched_change_begin(struct task_struct *p, unsigned int flags)
 {
 	struct rq *rq = task_rq(p);
-
-	lockdep_assert_rq_held(rq);
-
-	*ctx = (struct sched_enq_and_set_ctx){
+	struct sched_change_ctx ctx = {
 		.p = p,
-		.queue_flags = queue_flags,
+		.flags = flags,
 		.queued = task_on_rq_queued(p),
 		.running = task_current(rq, p),
 	};
 
-	update_rq_clock(rq);
-	if (ctx->queued)
-		dequeue_task(rq, p, queue_flags | DEQUEUE_NOCLOCK);
-	if (ctx->running)
+	lockdep_assert_rq_held(rq);
+
+	if (ctx.queued)
+		dequeue_task(rq, p, flags);
+	if (ctx.running)
 		put_prev_task(rq, p);
+
+	return ctx;
 }
 
-void sched_enq_and_set_task(struct sched_enq_and_set_ctx *ctx)
+void sched_change_end(struct sched_change_ctx ctx)
 {
-	struct rq *rq = task_rq(ctx->p);
+	struct rq *rq = task_rq(ctx.p);
 
 	lockdep_assert_rq_held(rq);
 
-	if (ctx->queued)
-		enqueue_task(rq, ctx->p, ctx->queue_flags | ENQUEUE_NOCLOCK);
-	if (ctx->running)
-		set_next_task(rq, ctx->p);
+	if (ctx.queued)
+		enqueue_task(rq, ctx.p, ctx.flags | ENQUEUE_NOCLOCK);
+	if (ctx.running)
+		set_next_task(rq, ctx.p);
 }
-#endif	/* CONFIG_SCHED_CLASS_EXT */
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -4355,11 +4355,10 @@ static void scx_ops_bypass(bool bypass)
 		 */
 		list_for_each_entry_safe_reverse(p, n, &rq->scx.runnable_list,
 						 scx.runnable_node) {
-			struct sched_enq_and_set_ctx ctx;
-
 			/* cycling deq/enq is enough, see the function comment */
-			sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx);
-			sched_enq_and_set_task(&ctx);
+			scoped_guard (sched_change, p, DEQUEUE_SAVE | DEQUEUE_MOVE) {
+				/* nothing */ ;
+			}
 		}
 
 		rq_unlock_irqrestore(rq, &rf);
@@ -4491,17 +4490,14 @@ static void scx_ops_disable_workfn(struc
 		const struct sched_class *old_class = p->sched_class;
 		const struct sched_class *new_class =
 			__setscheduler_class(p->policy, p->prio);
-		struct sched_enq_and_set_ctx ctx;
 
 		if (old_class != new_class && p->se.sched_delayed)
 			dequeue_task(task_rq(p), p, DEQUEUE_SLEEP | DEQUEUE_DELAYED);
 
-		sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx);
-
-		p->sched_class = new_class;
-		check_class_changing(task_rq(p), p, old_class);
-
-		sched_enq_and_set_task(&ctx);
+		scoped_guard (sched_change, p, DEQUEUE_SAVE | DEQUEUE_MOVE) {
+			p->sched_class = new_class;
+			check_class_changing(task_rq(p), p, old_class);
+		}
 
 		check_class_changed(task_rq(p), p, old_class, p->prio);
 		scx_ops_exit_task(p);
@@ -5206,18 +5202,15 @@ static int scx_ops_enable(struct sched_e
 		const struct sched_class *old_class = p->sched_class;
 		const struct sched_class *new_class =
 			__setscheduler_class(p->policy, p->prio);
-		struct sched_enq_and_set_ctx ctx;
 
 		if (old_class != new_class && p->se.sched_delayed)
-			dequeue_task(task_rq(p), p, DEQUEUE_SLEEP | DEQUEE_DELAYED);
-
-		sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx);
-
-		p->scx.slice = SCX_SLICE_DFL;
-		p->sched_class = new_class;
-		check_class_changing(task_rq(p), p, old_class);
+			dequeue_task(task_rq(p), p, DEQUEUE_SLEEP | DEQUEUE_DELAYED);
 
-		sched_enq_and_set_task(&ctx);
+		scoped_guard (sched_change, p, DEQUEUE_SAVE | DEQUEUE_MOVE) {
+			p->scx.slice = SCX_SLICE_DFL;
+			p->sched_class = new_class;
+			check_class_changing(task_rq(p), p, old_class);
+		}
 
 		check_class_changed(task_rq(p), p, old_class, p->prio);
 	}
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -3921,23 +3921,22 @@ static inline void balance_callbacks(str
 
 #endif
 
-#ifdef CONFIG_SCHED_CLASS_EXT
-/*
- * Used by SCX in the enable/disable paths to move tasks between sched_classes
- * and establish invariants.
- */
-struct sched_enq_and_set_ctx {
+struct sched_change_ctx {
 	struct task_struct	*p;
-	int			queue_flags;
+	unsigned int		flags;
 	bool			queued;
 	bool			running;
 };
 
-void sched_deq_and_put_task(struct task_struct *p, int queue_flags,
-			    struct sched_enq_and_set_ctx *ctx);
-void sched_enq_and_set_task(struct sched_enq_and_set_ctx *ctx);
+struct sched_change_ctx sched_change_begin(struct task_struct *p, unsigned int flags);
+void sched_change_end(struct sched_change_ctx ctx);
 
-#endif /* CONFIG_SCHED_CLASS_EXT */
+DEFINE_CLASS(sched_change, struct sched_change_ctx,
+	     sched_change_end(_T),
+	     sched_change_begin(p, flags),
+	     struct task_struct *p, unsigned int flags)
+
+DEFINE_CLASS_IS_UNCONDITIONAL(sched_change)
 
 #include "ext.h"
 
--- a/kernel/sched/syscalls.c
+++ b/kernel/sched/syscalls.c
@@ -64,7 +64,6 @@ static int effective_prio(struct task_st
 
 void set_user_nice(struct task_struct *p, long nice)
 {
-	bool queued, running;
 	struct rq *rq;
 	int old_prio;
 
@@ -90,22 +89,12 @@ void set_user_nice(struct task_struct *p
 		return;
 	}
 
-	queued = task_on_rq_queued(p);
-	running = task_current_donor(rq, p);
-	if (queued)
-		dequeue_task(rq, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK);
-	if (running)
-		put_prev_task(rq, p);
-
-	p->static_prio = NICE_TO_PRIO(nice);
-	set_load_weight(p, true);
-	old_prio = p->prio;
-	p->prio = effective_prio(p);
-
-	if (queued)
-		enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK);
-	if (running)
-		set_next_task(rq, p);
+	scoped_guard (sched_change, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK) {
+		p->static_prio = NICE_TO_PRIO(nice);
+		set_load_weight(p, true);
+		old_prio = p->prio;
+		p->prio = effective_prio(p);
+	}
 
 	/*
 	 * If the task increased its priority or is running and
@@ -528,7 +517,7 @@ int __sched_setscheduler(struct task_str
 			 bool user, bool pi)
 {
 	int oldpolicy = -1, policy = attr->sched_policy;
-	int retval, oldprio, newprio, queued, running;
+	int retval, oldprio, newprio;
 	const struct sched_class *prev_class, *next_class;
 	struct balance_callback *head;
 	struct rq_flags rf;
@@ -712,33 +701,25 @@ int __sched_setscheduler(struct task_str
 	if (prev_class != next_class && p->se.sched_delayed)
 		dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK);
 
-	queued = task_on_rq_queued(p);
-	running = task_current_donor(rq, p);
-	if (queued)
-		dequeue_task(rq, p, queue_flags);
-	if (running)
-		put_prev_task(rq, p);
-
-	if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) {
-		__setscheduler_params(p, attr);
-		p->sched_class = next_class;
-		p->prio = newprio;
-	}
-	__setscheduler_uclamp(p, attr);
-	check_class_changing(rq, p, prev_class);
+	scoped_guard (sched_change, p, queue_flags) {
 
-	if (queued) {
-		/*
-		 * We enqueue to tail when the priority of a task is
-		 * increased (user space view).
-		 */
-		if (oldprio < p->prio)
-			queue_flags |= ENQUEUE_HEAD;
+		if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) {
+			__setscheduler_params(p, attr);
+			p->sched_class = next_class;
+			p->prio = newprio;
+		}
+		__setscheduler_uclamp(p, attr);
+		check_class_changing(rq, p, prev_class);
 
-		enqueue_task(rq, p, queue_flags);
+		if (scope.queued) {
+			/*
+			 * We enqueue to tail when the priority of a task is
+			 * increased (user space view).
+			 */
+			if (oldprio < p->prio)
+				scope.flags |= ENQUEUE_HEAD;
+		}
 	}
-	if (running)
-		set_next_task(rq, p);
 
 	check_class_changed(rq, p, prev_class, oldprio);

Re: [RFC][PATCH 2/6] sched: Employ sched_change guards

Posted by Tejun Heo 3 weeks, 4 days ago

On Wed, Oct 30, 2024 at 04:12:57PM +0100, Peter Zijlstra wrote:
> --- a/kernel/sched/ext.c
> +++ b/kernel/sched/ext.c
...
> @@ -5206,18 +5202,15 @@ static int scx_ops_enable(struct sched_e
>  		const struct sched_class *old_class = p->sched_class;
>  		const struct sched_class *new_class =
>  			__setscheduler_class(p->policy, p->prio);
> -		struct sched_enq_and_set_ctx ctx;
>  
>  		if (old_class != new_class && p->se.sched_delayed)
> -			dequeue_task(task_rq(p), p, DEQUEUE_SLEEP | DEQUEE_DELAYED);
> -
> -		sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx);
> -
> -		p->scx.slice = SCX_SLICE_DFL;
> -		p->sched_class = new_class;
> -		check_class_changing(task_rq(p), p, old_class);
> +			dequeue_task(task_rq(p), p, DEQUEUE_SLEEP | DEQUEUE_DELAYED);
>  
> -		sched_enq_and_set_task(&ctx);
> +		scoped_guard (sched_change, p, DEQUEUE_SAVE | DEQUEUE_MOVE) {
> +			p->scx.slice = SCX_SLICE_DFL;
> +			p->sched_class = new_class;
> +			check_class_changing(task_rq(p), p, old_class);
> +		}
>  
>  		check_class_changed(task_rq(p), p, old_class, p->prio);
>  	}

I get the following from missing update_rq_lock():

  rq->clock_update_flags < RQCF_ACT_SKIP
  WARNING: CPU: 2 PID: 1692 at kernel/sched/sched.h:1647 update_load_avg+0x7c3/0x8c0
  Modules linked in:
  CPU: 2 UID: 0 PID: 1692 Comm: runner Not tainted 6.12.0-rc5-work-00336-g9bfae8f5ca65-dirty #515
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS unknown 2/2/2022
  Sched_ext: maximal (enabling+all)
  RIP: 0010:update_load_avg+0x7c3/0x8c0
  Code: 00 4c 2b bb c8 01 00 00 40 f6 c5 02 0f 84 e7 f8 ff ff e9 fa f8 ff ff c6 05 28 1f 81 02 01 48 c7 c7 f9 c5 dd 82 e8 1d 04 fb ff <0f> 0b e9 aa f8 ff ff 0f 0b 41 83 be f0 0c 00 00 01 0f 86 8d f8 ff
  RSP: 0018:ffffc900003c7c60 EFLAGS: 00010086
  RAX: 0000000000000026 RBX: ffff88810163d400 RCX: 0000000000000027
  RDX: 0000000000000002 RSI: 00000000ffffdfff RDI: ffff888237c9b448
  RBP: 0000000000000000 R08: 0000000000001fff R09: ffffffff8368dff0
  R10: 0000000000005ffd R11: 0000000000000004 R12: ffffffff82edb890
  R13: ffff888100398080 R14: ffff888237c30180 R15: ffff888100398000
  FS:  00007f850b4006c0(0000) GS:ffff888237c80000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 00007f84fc000020 CR3: 0000000103bfa000 CR4: 0000000000750eb0
  PKRU: 55555554
  Call Trace:
   <TASK>
   detach_task_cfs_rq+0x31/0xf0
   check_class_changed+0x29/0x70
   bpf_scx_reg+0xa72/0xc30
   bpf_struct_ops_link_create+0xf8/0x140
   __sys_bpf+0x348/0x510
   __x64_sys_bpf+0x18/0x20
   do_syscall_64+0x7b/0x140
   ? exc_page_fault+0x6b/0xb0
   entry_SYSCALL_64_after_hwframe+0x76/0x7e
  RIP: 0033:0x7f850c0551fd
  Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d e3 fa 0c 00 f7 d8 64 89 01 48
  RSP: 002b:00007f850b3ffba8 EFLAGS: 00000202 ORIG_RAX: 0000000000000141
  RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f850c0551fd
  RDX: 0000000000000040 RSI: 00007f850b3ffdc0 RDI: 000000000000001c
  RBP: 00007f850b3ffbd0 R08: 0000000000000000 R09: 0000000000000000
  R10: 0000000000000001 R11: 0000000000000202 R12: 00007f850b4006c0
  R13: ffffffffffffff80 R14: 000000000000005f R15: 00007ffdf6c8de30
   </TASK>

The following patch fixes it. Thanks.

---
 kernel/sched/ext.c |    4 ++++
 1 file changed, 4 insertions(+)

--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -4496,6 +4496,8 @@ static void scx_ops_disable_workfn(struc
 		const struct sched_class *new_class =
 			__setscheduler_class(p->policy, p->prio);
 
+		update_rq_clock(task_rq(p));
+
 		if (old_class != new_class && p->se.sched_delayed)
 			dequeue_task(task_rq(p), p, DEQUEUE_SLEEP | DEQUEUE_DELAYED);
 
@@ -5208,6 +5210,8 @@ static int scx_ops_enable(struct sched_e
 		const struct sched_class *new_class =
 			__setscheduler_class(p->policy, p->prio);
 
+		update_rq_clock(task_rq(p));
+
 		if (old_class != new_class && p->se.sched_delayed)
 			dequeue_task(task_rq(p), p, DEQUEUE_SLEEP | DEQUEUE_DELAYED);

Re: [RFC][PATCH 2/6] sched: Employ sched_change guards

Posted by Tejun Heo 3 weeks, 4 days ago

On Wed, Oct 30, 2024 at 10:58:40AM -1000, Tejun Heo wrote:
> On Wed, Oct 30, 2024 at 04:12:57PM +0100, Peter Zijlstra wrote:
> --- a/kernel/sched/ext.c
> +++ b/kernel/sched/ext.c
> @@ -4496,6 +4496,8 @@ static void scx_ops_disable_workfn(struc
>  		const struct sched_class *new_class =
>  			__setscheduler_class(p->policy, p->prio);
>  
> +		update_rq_clock(task_rq(p));
> +

Oh, this probably should be paired with DEQUEUE_NOCLOCK.

Thanks.

-- 
tejun

Re: [RFC][PATCH 2/6] sched: Employ sched_change guards

Posted by Peter Zijlstra 3 weeks, 4 days ago

On Wed, Oct 30, 2024 at 11:09:56AM -1000, Tejun Heo wrote:
> On Wed, Oct 30, 2024 at 10:58:40AM -1000, Tejun Heo wrote:
> > On Wed, Oct 30, 2024 at 04:12:57PM +0100, Peter Zijlstra wrote:
> > --- a/kernel/sched/ext.c
> > +++ b/kernel/sched/ext.c
> > @@ -4496,6 +4496,8 @@ static void scx_ops_disable_workfn(struc
> >  		const struct sched_class *new_class =
> >  			__setscheduler_class(p->policy, p->prio);
> >  
> > +		update_rq_clock(task_rq(p));
> > +
> 
> Oh, this probably should be paired with DEQUEUE_NOCLOCK.

Yeah, it is like that at the end of the series. I'll shuffle things
around and make it so earlier.

Thanks!