Use the new sched_class::switching_from() method to dequeue delayed
tasks before switching to another class.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
kernel/sched/core.c | 12 ++++++++----
kernel/sched/ext.c | 12 ++++--------
kernel/sched/fair.c | 7 +++++++
kernel/sched/syscalls.c | 3 ---
4 files changed, 19 insertions(+), 15 deletions(-)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7139,9 +7139,6 @@ void rt_mutex_setprio(struct task_struct
if (prev_class != next_class)
queue_flag |= DEQUEUE_CLASS;
- if (prev_class != next_class && p->se.sched_delayed)
- dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK);
-
scoped_guard (sched_change, p, queue_flag) {
/*
* Boosting condition are:
@@ -10530,8 +10527,15 @@ struct sched_change_ctx sched_change_beg
lockdep_assert_rq_held(rq);
- if ((flags & DEQUEUE_CLASS) && p->sched_class->switching_from)
+ if ((flags & DEQUEUE_CLASS) && p->sched_class->switching_from) {
+ /*
+ * switching_from_fair() assumes CLASS implies NOCLOCK; fixing
+ * this assumption would mean switching_from() would need to be
+ * able to change flags.
+ */
+ SCHED_WARN_ON(!(flags & DEQUEUE_NOCLOCK));
p->sched_class->switching_from(rq, p, flags);
+ }
struct sched_change_ctx ctx = {
.p = p,
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -4485,7 +4485,7 @@ static void scx_ops_disable_workfn(struc
scx_task_iter_start(&sti);
while ((p = scx_task_iter_next_locked(&sti))) {
- unsigned int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE;
+ unsigned int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
const struct sched_class *old_class = p->sched_class;
const struct sched_class *new_class =
__setscheduler_class(p->policy, p->prio);
@@ -4493,9 +4493,7 @@ static void scx_ops_disable_workfn(struc
if (old_class != new_class)
queue_flags |= DEQUEUE_CLASS;
- if (old_class != new_class && p->se.sched_delayed)
- dequeue_task(task_rq(p), p, DEQUEUE_SLEEP | DEQUEUE_DELAYED);
-
+ update_rq_clock(task_rq(p));
scoped_guard (sched_change, p, queue_flags) {
p->sched_class = new_class;
}
@@ -5202,7 +5200,7 @@ static int scx_ops_enable(struct sched_e
percpu_down_write(&scx_fork_rwsem);
scx_task_iter_start(&sti);
while ((p = scx_task_iter_next_locked(&sti))) {
- unsigned int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE;
+ unsigned int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
const struct sched_class *old_class = p->sched_class;
const struct sched_class *new_class =
__setscheduler_class(p->policy, p->prio);
@@ -5210,9 +5208,7 @@ static int scx_ops_enable(struct sched_e
if (old_class != new_class)
queue_flags |= DEQUEUE_CLASS;
- if (old_class != new_class && p->se.sched_delayed)
- dequeue_task(task_rq(p), p, DEQUEUE_SLEEP | DEQUEUE_DELAYED);
-
+ update_rq_clock(task_rq(p));
scoped_guard (sched_change, p, DEQUEUE_SAVE | DEQUEUE_MOVE) {
p->scx.slice = SCX_SLICE_DFL;
p->sched_class = new_class;
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -13175,6 +13175,12 @@ static void attach_task_cfs_rq(struct ta
attach_entity_cfs_rq(se);
}
+static void switching_from_fair(struct rq *rq, struct task_struct *p, int flags)
+{
+ if ((flags & DEQUEUE_CLASS) && p->se.sched_delayed)
+ dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK);
+}
+
static void switched_from_fair(struct rq *rq, struct task_struct *p, int flags)
{
detach_task_cfs_rq(p);
@@ -13592,6 +13598,7 @@ DEFINE_SCHED_CLASS(fair) = {
.reweight_task = reweight_task_fair,
.prio_changed = prio_changed_fair,
+ .switching_from = switching_from_fair,
.switched_from = switched_from_fair,
.switched_to = switched_to_fair,
--- a/kernel/sched/syscalls.c
+++ b/kernel/sched/syscalls.c
@@ -701,9 +701,6 @@ int __sched_setscheduler(struct task_str
if (prev_class != next_class)
queue_flags |= DEQUEUE_CLASS;
- if (prev_class != next_class && p->se.sched_delayed)
- dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK);
-
scoped_guard (sched_change, p, queue_flags) {
if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) {
On Wed, Oct 30, 2024 at 04:13:01PM +0100, Peter Zijlstra wrote: > --- a/kernel/sched/ext.c > +++ b/kernel/sched/ext.c > @@ -5202,7 +5200,7 @@ static int scx_ops_enable(struct sched_e > percpu_down_write(&scx_fork_rwsem); > scx_task_iter_start(&sti); > while ((p = scx_task_iter_next_locked(&sti))) { > - unsigned int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE; > + unsigned int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK; > const struct sched_class *old_class = p->sched_class; > const struct sched_class *new_class = > __setscheduler_class(p->policy, p->prio); > @@ -5210,9 +5208,7 @@ static int scx_ops_enable(struct sched_e > if (old_class != new_class) > queue_flags |= DEQUEUE_CLASS; > > - if (old_class != new_class && p->se.sched_delayed) > - dequeue_task(task_rq(p), p, DEQUEUE_SLEEP | DEQUEUE_DELAYED); > - > + update_rq_clock(task_rq(p)); > scoped_guard (sched_change, p, DEQUEUE_SAVE | DEQUEUE_MOVE) { > p->scx.slice = SCX_SLICE_DFL; > p->sched_class = new_class; TJ, strictly speaking you should probably do a __balance_callbacks() in __scx_task_irq_rq_unlock(). The various switched_from() methods like to queue_balance_callback(). I think you're currently good by the fact that only RT/DL do this, and you're not touching those in these loops.
On Wed, Oct 30, 2024 at 04:45:51PM +0100, Peter Zijlstra wrote: > TJ, strictly speaking you should probably do a __balance_callbacks() in > __scx_task_irq_rq_unlock(). The various switched_from() methods like to > queue_balance_callback(). > > I think you're currently good by the fact that only RT/DL do this, and > you're not touching those in these loops. Ah, will send a patch soon. Thanks. -- tejun
© 2016 - 2024 Red Hat, Inc.