kernel/sched/core.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-)
The following commit has been merged into the sched/core branch of tip:
Commit-ID: dfa0a574cbc47bfd5f8985f74c8ea003a37fa078
Gitweb: https://git.kernel.org/tip/dfa0a574cbc47bfd5f8985f74c8ea003a37fa078
Author: Peter Zijlstra <peterz@infradead.org>
AuthorDate: Wed, 05 Jun 2024 12:09:11 +02:00
Committer: Peter Zijlstra <peterz@infradead.org>
CommitterDate: Sat, 17 Aug 2024 11:06:42 +02:00
sched/uclamg: Handle delayed dequeue
Delayed dequeue has tasks sit around on the runqueue that are not
actually runnable -- specifically, they will be dequeued the moment
they get picked.
One side-effect is that such a task can get migrated, which leads to a
'nested' dequeue_task() scenario that messes up uclamp if we don't
take care.
Notably, dequeue_task(DEQUEUE_SLEEP) can 'fail' and keep the task on
the runqueue. This however will have removed the task from uclamp --
per uclamp_rq_dec() in dequeue_task(). So far so good.
However, if at that point the task gets migrated -- or nice adjusted
or any of a myriad of operations that does a dequeue-enqueue cycle --
we'll pass through dequeue_task()/enqueue_task() again. Without
modification this will lead to a double decrement for uclamp, which is
wrong.
Reported-by: Luis Machado <luis.machado@arm.com>
Reported-by: Hongyan Xia <hongyan.xia2@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Valentin Schneider <vschneid@redhat.com>
Tested-by: Valentin Schneider <vschneid@redhat.com>
Link: https://lkml.kernel.org/r/20240727105029.315205425@infradead.org
---
kernel/sched/core.c | 16 +++++++++++++++-
1 file changed, 15 insertions(+), 1 deletion(-)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7356464..80e639e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1691,6 +1691,9 @@ static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
if (unlikely(!p->sched_class->uclamp_enabled))
return;
+ if (p->se.sched_delayed)
+ return;
+
for_each_clamp_id(clamp_id)
uclamp_rq_inc_id(rq, p, clamp_id);
@@ -1715,6 +1718,9 @@ static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p)
if (unlikely(!p->sched_class->uclamp_enabled))
return;
+ if (p->se.sched_delayed)
+ return;
+
for_each_clamp_id(clamp_id)
uclamp_rq_dec_id(rq, p, clamp_id);
}
@@ -1994,8 +2000,12 @@ void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
psi_enqueue(p, (flags & ENQUEUE_WAKEUP) && !(flags & ENQUEUE_MIGRATED));
}
- uclamp_rq_inc(rq, p);
p->sched_class->enqueue_task(rq, p, flags);
+ /*
+ * Must be after ->enqueue_task() because ENQUEUE_DELAYED can clear
+ * ->sched_delayed.
+ */
+ uclamp_rq_inc(rq, p);
if (sched_core_enabled(rq))
sched_core_enqueue(rq, p);
@@ -2017,6 +2027,10 @@ inline bool dequeue_task(struct rq *rq, struct task_struct *p, int flags)
psi_dequeue(p, flags & DEQUEUE_SLEEP);
}
+ /*
+ * Must be before ->dequeue_task() because ->dequeue_task() can 'fail'
+ * and mark the task ->sched_delayed.
+ */
uclamp_rq_dec(rq, p);
return p->sched_class->dequeue_task(rq, p, flags);
}
On 8/18/24 07:23, tip-bot2 for Peter Zijlstra wrote: > The following commit has been merged into the sched/core branch of tip: > > Commit-ID: dfa0a574cbc47bfd5f8985f74c8ea003a37fa078 > Gitweb: https://git.kernel.org/tip/dfa0a574cbc47bfd5f8985f74c8ea003a37fa078 > Author: Peter Zijlstra <peterz@infradead.org> > AuthorDate: Wed, 05 Jun 2024 12:09:11 +02:00 > Committer: Peter Zijlstra <peterz@infradead.org> > CommitterDate: Sat, 17 Aug 2024 11:06:42 +02:00 > > sched/uclamg: Handle delayed dequeue Nit, but I haven't seen the typo until now. > > Delayed dequeue has tasks sit around on the runqueue that are not > actually runnable -- specifically, they will be dequeued the moment > they get picked. > > One side-effect is that such a task can get migrated, which leads to a > 'nested' dequeue_task() scenario that messes up uclamp if we don't > take care. > > Notably, dequeue_task(DEQUEUE_SLEEP) can 'fail' and keep the task on > the runqueue. This however will have removed the task from uclamp -- > per uclamp_rq_dec() in dequeue_task(). So far so good. > > However, if at that point the task gets migrated -- or nice adjusted > or any of a myriad of operations that does a dequeue-enqueue cycle -- > we'll pass through dequeue_task()/enqueue_task() again. Without > modification this will lead to a double decrement for uclamp, which is > wrong. > > Reported-by: Luis Machado <luis.machado@arm.com> > Reported-by: Hongyan Xia <hongyan.xia2@arm.com> > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> > Reviewed-by: Valentin Schneider <vschneid@redhat.com> > Tested-by: Valentin Schneider <vschneid@redhat.com> > Link: https://lkml.kernel.org/r/20240727105029.315205425@infradead.org > --- > kernel/sched/core.c | 16 +++++++++++++++- > 1 file changed, 15 insertions(+), 1 deletion(-) > > diff --git a/kernel/sched/core.c b/kernel/sched/core.c > index 7356464..80e639e 100644 > --- a/kernel/sched/core.c > +++ b/kernel/sched/core.c > @@ -1691,6 +1691,9 @@ static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p) > if (unlikely(!p->sched_class->uclamp_enabled)) > return; > > + if (p->se.sched_delayed) > + return; > + > for_each_clamp_id(clamp_id) > uclamp_rq_inc_id(rq, p, clamp_id); > > @@ -1715,6 +1718,9 @@ static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p) > if (unlikely(!p->sched_class->uclamp_enabled)) > return; > > + if (p->se.sched_delayed) > + return; > + > for_each_clamp_id(clamp_id) > uclamp_rq_dec_id(rq, p, clamp_id); > } > @@ -1994,8 +2000,12 @@ void enqueue_task(struct rq *rq, struct task_struct *p, int flags) > psi_enqueue(p, (flags & ENQUEUE_WAKEUP) && !(flags & ENQUEUE_MIGRATED)); > } > > - uclamp_rq_inc(rq, p); > p->sched_class->enqueue_task(rq, p, flags); > + /* > + * Must be after ->enqueue_task() because ENQUEUE_DELAYED can clear > + * ->sched_delayed. > + */ > + uclamp_rq_inc(rq, p); > > if (sched_core_enabled(rq)) > sched_core_enqueue(rq, p); > @@ -2017,6 +2027,10 @@ inline bool dequeue_task(struct rq *rq, struct task_struct *p, int flags) > psi_dequeue(p, flags & DEQUEUE_SLEEP); > } > > + /* > + * Must be before ->dequeue_task() because ->dequeue_task() can 'fail' > + * and mark the task ->sched_delayed. > + */ > uclamp_rq_dec(rq, p); > return p->sched_class->dequeue_task(rq, p, flags); > } >
© 2016 - 2026 Red Hat, Inc.