Add {DE,EN}QUEUE_CLASS and fold the sched_class::switch* methods into
the change pattern. This completes and makes the pattern more
symmetric.
This changes the order of callbacks slightly:
|
| switching_from()
dequeue_task(); | dequeue_task()
put_prev_task(); | put_prev_task()
| switched_from()
|
... change task ... | ... change task ...
|
switching_to(); | switching_to()
enqueue_task(); | enqueue_task()
set_next_task(); | set_next_task()
prev_class->switched_from() |
switched_to() | switched_to()
|
Notably, it moves the switched_from() callback right after the
dequeue/put. Existing implementations don't appear to be affected by
this change in location -- specifically the task isn't enqueued on the
class in question in either location.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
kernel/sched/core.c | 61 ++++++++++++++++++++---------------------------
kernel/sched/ext.c | 19 ++++++++++----
kernel/sched/idle.c | 4 +--
kernel/sched/rt.c | 4 +--
kernel/sched/sched.h | 21 ++++++----------
kernel/sched/stop_task.c | 4 +--
kernel/sched/syscalls.c | 7 +++--
7 files changed, 59 insertions(+), 61 deletions(-)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2102,34 +2102,9 @@ inline int task_curr(const struct task_s
return cpu_curr(task_cpu(p)) == p;
}
-/*
- * ->switching_to() is called with the pi_lock and rq_lock held and must not
- * mess with locking.
- */
-void check_class_changing(struct rq *rq, struct task_struct *p,
- const struct sched_class *prev_class)
+void check_prio_changed(struct rq *rq, struct task_struct *p, int oldprio)
{
- if (prev_class != p->sched_class && p->sched_class->switching_to)
- p->sched_class->switching_to(rq, p);
-}
-
-/*
- * switched_from, switched_to and prio_changed must _NOT_ drop rq->lock,
- * use the balance_callback list if you want balancing.
- *
- * this means any call to check_class_changed() must be followed by a call to
- * balance_callback().
- */
-void check_class_changed(struct rq *rq, struct task_struct *p,
- const struct sched_class *prev_class,
- int oldprio)
-{
- if (prev_class != p->sched_class) {
- if (prev_class->switched_from)
- prev_class->switched_from(rq, p);
-
- p->sched_class->switched_to(rq, p);
- } else if (oldprio != p->prio || dl_task(p))
+ if (oldprio != p->prio || dl_task(p))
p->sched_class->prio_changed(rq, p, oldprio);
}
@@ -7161,6 +7136,9 @@ void rt_mutex_setprio(struct task_struct
prev_class = p->sched_class;
next_class = __setscheduler_class(p->policy, prio);
+ if (prev_class != next_class)
+ queue_flag |= DEQUEUE_CLASS;
+
if (prev_class != next_class && p->se.sched_delayed)
dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK);
@@ -7197,11 +7175,10 @@ void rt_mutex_setprio(struct task_struct
p->sched_class = next_class;
p->prio = prio;
-
- check_class_changing(rq, p, prev_class);
}
- check_class_changed(rq, p, prev_class, oldprio);
+ if (!(queue_flag & DEQUEUE_CLASS))
+ check_prio_changed(rq, p, oldprio);
out_unlock:
/* Avoid rq from going away on us: */
preempt_disable();
@@ -10550,6 +10527,12 @@ void sched_mm_cid_fork(struct task_struc
struct sched_change_ctx sched_change_begin(struct task_struct *p, unsigned int flags)
{
struct rq *rq = task_rq(p);
+
+ lockdep_assert_rq_held(rq);
+
+ if ((flags & DEQUEUE_CLASS) && p->sched_class->switching_from)
+ p->sched_class->switching_from(rq, p);
+
struct sched_change_ctx ctx = {
.p = p,
.flags = flags,
@@ -10557,24 +10540,32 @@ struct sched_change_ctx sched_change_beg
.running = task_current(rq, p),
};
- lockdep_assert_rq_held(rq);
-
if (ctx.queued)
dequeue_task(rq, p, flags);
if (ctx.running)
put_prev_task(rq, p);
+ if ((flags & DEQUEUE_CLASS) && p->sched_class->switched_from)
+ p->sched_class->switched_from(rq, p);
+
return ctx;
}
void sched_change_end(struct sched_change_ctx ctx)
{
- struct rq *rq = task_rq(ctx.p);
+ struct task_struct *p = ctx.p;
+ struct rq *rq = task_rq(p);
lockdep_assert_rq_held(rq);
+ if ((ctx.flags & ENQUEUE_CLASS) && p->sched_class->switching_to)
+ p->sched_class->switching_to(rq, p);
+
if (ctx.queued)
- enqueue_task(rq, ctx.p, ctx.flags | ENQUEUE_NOCLOCK);
+ enqueue_task(rq, p, ctx.flags | ENQUEUE_NOCLOCK);
if (ctx.running)
- set_next_task(rq, ctx.p);
+ set_next_task(rq, p);
+
+ if ((ctx.flags & ENQUEUE_CLASS) && p->sched_class->switched_to)
+ p->sched_class->switched_to(rq, p);
}
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -4487,19 +4487,24 @@ static void scx_ops_disable_workfn(struc
scx_task_iter_start(&sti);
while ((p = scx_task_iter_next_locked(&sti))) {
+ unsigned int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE;
const struct sched_class *old_class = p->sched_class;
const struct sched_class *new_class =
__setscheduler_class(p->policy, p->prio);
+ if (old_class != new_class)
+ queue_flags |= DEQUEUE_CLASS;
+
if (old_class != new_class && p->se.sched_delayed)
dequeue_task(task_rq(p), p, DEQUEUE_SLEEP | DEQUEUE_DELAYED);
- scoped_guard (sched_change, p, DEQUEUE_SAVE | DEQUEUE_MOVE) {
+ scoped_guard (sched_change, p, queue_flags) {
p->sched_class = new_class;
- check_class_changing(task_rq(p), p, old_class);
}
- check_class_changed(task_rq(p), p, old_class, p->prio);
+ if (!(queue_flags & DEQUEUE_CLASS))
+ check_prio_changed(task_rq(p), p, p->prio);
+
scx_ops_exit_task(p);
}
scx_task_iter_stop(&sti);
@@ -5199,20 +5204,24 @@ static int scx_ops_enable(struct sched_e
percpu_down_write(&scx_fork_rwsem);
scx_task_iter_start(&sti);
while ((p = scx_task_iter_next_locked(&sti))) {
+ unsigned int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE;
const struct sched_class *old_class = p->sched_class;
const struct sched_class *new_class =
__setscheduler_class(p->policy, p->prio);
+ if (old_class != new_class)
+ queue_flags |= DEQUEUE_CLASS;
+
if (old_class != new_class && p->se.sched_delayed)
dequeue_task(task_rq(p), p, DEQUEUE_SLEEP | DEQUEUE_DELAYED);
scoped_guard (sched_change, p, DEQUEUE_SAVE | DEQUEUE_MOVE) {
p->scx.slice = SCX_SLICE_DFL;
p->sched_class = new_class;
- check_class_changing(task_rq(p), p, old_class);
}
- check_class_changed(task_rq(p), p, old_class, p->prio);
+ if (!(queue_flags & DEQUEUE_CLASS))
+ check_prio_changed(task_rq(p), p, p->prio);
}
scx_task_iter_stop(&sti);
percpu_up_write(&scx_fork_rwsem);
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -494,7 +494,7 @@ static void task_tick_idle(struct rq *rq
{
}
-static void switched_to_idle(struct rq *rq, struct task_struct *p)
+static void switching_to_idle(struct rq *rq, struct task_struct *p)
{
BUG();
}
@@ -534,6 +534,6 @@ DEFINE_SCHED_CLASS(idle) = {
.task_tick = task_tick_idle,
.prio_changed = prio_changed_idle,
- .switched_to = switched_to_idle,
+ .switched_to = switching_to_idle,
.update_curr = update_curr_idle,
};
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -2633,7 +2633,6 @@ DEFINE_SCHED_CLASS(rt) = {
.rq_online = rq_online_rt,
.rq_offline = rq_offline_rt,
.task_woken = task_woken_rt,
- .switched_from = switched_from_rt,
.find_lock_rq = find_lock_lowest_rq,
#endif
@@ -2641,8 +2640,9 @@ DEFINE_SCHED_CLASS(rt) = {
.get_rr_interval = get_rr_interval_rt,
- .prio_changed = prio_changed_rt,
+ .switched_from = switched_from_rt,
.switched_to = switched_to_rt,
+ .prio_changed = prio_changed_rt,
.update_curr = update_curr_rt,
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2340,6 +2340,7 @@ extern const u32 sched_prio_to_wmult[40
#define DEQUEUE_MIGRATING 0x10 /* Matches ENQUEUE_MIGRATING */
#define DEQUEUE_DELAYED 0x20 /* Matches ENQUEUE_DELAYED */
+#define DEQUEUE_CLASS 0x40 /* Matches ENQUEUE_CLASS */
#define DEQUEUE_SPECIAL 0x0100
@@ -2350,6 +2351,7 @@ extern const u32 sched_prio_to_wmult[40
#define ENQUEUE_MIGRATING 0x10
#define ENQUEUE_DELAYED 0x20
+#define ENQUEUE_CLASS 0x40
#define ENQUEUE_HEAD 0x0100
#define ENQUEUE_REPLENISH 0x0200
@@ -2415,14 +2417,11 @@ struct sched_class {
void (*task_fork)(struct task_struct *p);
void (*task_dead)(struct task_struct *p);
- /*
- * The switched_from() call is allowed to drop rq->lock, therefore we
- * cannot assume the switched_from/switched_to pair is serialized by
- * rq->lock. They are however serialized by p->pi_lock.
- */
- void (*switching_to) (struct rq *this_rq, struct task_struct *task);
- void (*switched_from)(struct rq *this_rq, struct task_struct *task);
- void (*switched_to) (struct rq *this_rq, struct task_struct *task);
+ void (*switching_from)(struct rq *this_rq, struct task_struct *task);
+ void (*switched_from) (struct rq *this_rq, struct task_struct *task);
+ void (*switching_to) (struct rq *this_rq, struct task_struct *task);
+ void (*switched_to) (struct rq *this_rq, struct task_struct *task);
+
void (*reweight_task)(struct rq *this_rq, struct task_struct *task,
const struct load_weight *lw);
void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
@@ -3898,11 +3897,7 @@ extern void set_load_weight(struct task_
extern void enqueue_task(struct rq *rq, struct task_struct *p, int flags);
extern bool dequeue_task(struct rq *rq, struct task_struct *p, int flags);
-extern void check_class_changing(struct rq *rq, struct task_struct *p,
- const struct sched_class *prev_class);
-extern void check_class_changed(struct rq *rq, struct task_struct *p,
- const struct sched_class *prev_class,
- int oldprio);
+extern void check_prio_changed(struct rq *rq, struct task_struct *p, int oldprio);
#ifdef CONFIG_SMP
extern struct balance_callback *splice_balance_callbacks(struct rq *rq);
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -76,7 +76,7 @@ static void task_tick_stop(struct rq *rq
{
}
-static void switched_to_stop(struct rq *rq, struct task_struct *p)
+static void switching_to_stop(struct rq *rq, struct task_struct *p)
{
BUG(); /* its impossible to change to this class */
}
@@ -115,6 +115,6 @@ DEFINE_SCHED_CLASS(stop) = {
.task_tick = task_tick_stop,
.prio_changed = prio_changed_stop,
- .switched_to = switched_to_stop,
+ .switching_to = switching_to_stop,
.update_curr = update_curr_stop,
};
--- a/kernel/sched/syscalls.c
+++ b/kernel/sched/syscalls.c
@@ -698,6 +698,9 @@ int __sched_setscheduler(struct task_str
prev_class = p->sched_class;
next_class = __setscheduler_class(policy, newprio);
+ if (prev_class != next_class)
+ queue_flags |= DEQUEUE_CLASS;
+
if (prev_class != next_class && p->se.sched_delayed)
dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK);
@@ -709,7 +712,6 @@ int __sched_setscheduler(struct task_str
p->prio = newprio;
}
__setscheduler_uclamp(p, attr);
- check_class_changing(rq, p, prev_class);
if (scope.queued) {
/*
@@ -721,7 +723,8 @@ int __sched_setscheduler(struct task_str
}
}
- check_class_changed(rq, p, prev_class, oldprio);
+ if (!(queue_flags & DEQUEUE_CLASS))
+ check_prio_changed(rq, p, oldprio);
/* Avoid rq from going away on us: */
preempt_disable();
On Wed, Oct 30, 2024 at 04:12:59PM +0100, Peter Zijlstra wrote: > --- a/kernel/sched/ext.c > +++ b/kernel/sched/ext.c > @@ -5199,20 +5204,24 @@ static int scx_ops_enable(struct sched_e > percpu_down_write(&scx_fork_rwsem); > scx_task_iter_start(&sti); > while ((p = scx_task_iter_next_locked(&sti))) { > + unsigned int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE; > const struct sched_class *old_class = p->sched_class; > const struct sched_class *new_class = > __setscheduler_class(p->policy, p->prio); > > + if (old_class != new_class) > + queue_flags |= DEQUEUE_CLASS; > + > if (old_class != new_class && p->se.sched_delayed) > dequeue_task(task_rq(p), p, DEQUEUE_SLEEP | DEQUEUE_DELAYED); > > scoped_guard (sched_change, p, DEQUEUE_SAVE | DEQUEUE_MOVE) { ^ queue_flags > p->scx.slice = SCX_SLICE_DFL; > p->sched_class = new_class; > - check_class_changing(task_rq(p), p, old_class); > } > > - check_class_changed(task_rq(p), p, old_class, p->prio); > + if (!(queue_flags & DEQUEUE_CLASS)) > + check_prio_changed(task_rq(p), p, p->prio); Maybe prio_changed can be moved into scoped_guard? Thanks. -- tejun
On Wed, Oct 30, 2024 at 11:12:32AM -1000, Tejun Heo wrote: > On Wed, Oct 30, 2024 at 04:12:59PM +0100, Peter Zijlstra wrote: > > --- a/kernel/sched/ext.c > > +++ b/kernel/sched/ext.c > > @@ -5199,20 +5204,24 @@ static int scx_ops_enable(struct sched_e > > percpu_down_write(&scx_fork_rwsem); > > scx_task_iter_start(&sti); > > while ((p = scx_task_iter_next_locked(&sti))) { > > + unsigned int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE; > > const struct sched_class *old_class = p->sched_class; > > const struct sched_class *new_class = > > __setscheduler_class(p->policy, p->prio); > > > > + if (old_class != new_class) > > + queue_flags |= DEQUEUE_CLASS; > > + > > if (old_class != new_class && p->se.sched_delayed) > > dequeue_task(task_rq(p), p, DEQUEUE_SLEEP | DEQUEUE_DELAYED); > > > > scoped_guard (sched_change, p, DEQUEUE_SAVE | DEQUEUE_MOVE) { > ^ > queue_flags > > > p->scx.slice = SCX_SLICE_DFL; > > p->sched_class = new_class; > > - check_class_changing(task_rq(p), p, old_class); > > } > > > > - check_class_changed(task_rq(p), p, old_class, p->prio); > > + if (!(queue_flags & DEQUEUE_CLASS)) > > + check_prio_changed(task_rq(p), p, p->prio); > > Maybe prio_changed can be moved into scoped_guard? It wasn't before -- do you have need for it to be inside?
On Wed, Oct 30, 2024 at 10:15:06PM +0100, Peter Zijlstra wrote: ... > > > + if (!(queue_flags & DEQUEUE_CLASS)) > > > + check_prio_changed(task_rq(p), p, p->prio); > > > > Maybe prio_changed can be moved into scoped_guard? > > It wasn't before -- do you have need for it to be inside? No, was just wondering whether that'd make things a bit more compact. Either way is fine. Thanks. -- tejun
On Wed, Oct 30, 2024 at 11:29:09AM -1000, Tejun Heo wrote: > On Wed, Oct 30, 2024 at 10:15:06PM +0100, Peter Zijlstra wrote: > ... > > > > + if (!(queue_flags & DEQUEUE_CLASS)) > > > > + check_prio_changed(task_rq(p), p, p->prio); > > > > > > Maybe prio_changed can be moved into scoped_guard? > > > > It wasn't before -- do you have need for it to be inside? > > No, was just wondering whether that'd make things a bit more compact. Either > way is fine. Oh, did you perhaps mean into sched_change_end() ? I suppose that's possible indeed. Initially I thought that would require yet another flags, but looking at it again, that doesn't seem to be the case. All sched_change users lacking it never change the prio anyway. I'll have a look at doing that tomorrow, with a slightly fresher brain. I also think that adding flags to the switch*() methods isn't at all needed, but perhaps it makes sense anyway.
Hello, On Wed, Oct 30, 2024 at 10:37:35PM +0100, Peter Zijlstra wrote: > On Wed, Oct 30, 2024 at 11:29:09AM -1000, Tejun Heo wrote: > > On Wed, Oct 30, 2024 at 10:15:06PM +0100, Peter Zijlstra wrote: > > ... > > > > > + if (!(queue_flags & DEQUEUE_CLASS)) > > > > > + check_prio_changed(task_rq(p), p, p->prio); > > > > > > > > Maybe prio_changed can be moved into scoped_guard? > > > > > > It wasn't before -- do you have need for it to be inside? > > > > No, was just wondering whether that'd make things a bit more compact. Either > > way is fine. > > Oh, did you perhaps mean into sched_change_end() ? I suppose that's > possible indeed. Initially I thought that would require yet another Oh yeah, that's what I meant. Sorry about not being clearer. > flags, but looking at it again, that doesn't seem to be the case. All > sched_change users lacking it never change the prio anyway. > > I'll have a look at doing that tomorrow, with a slightly fresher brain. > > I also think that adding flags to the switch*() methods isn't at all > needed, but perhaps it makes sense anyway. Fantastic. Thanks. -- tejun
© 2016 - 2024 Red Hat, Inc.