From: Connor O'Brien <connoro@google.com>
Add a helper to find the runnable owner down a chain of blocked waiters
This patch was broken out from a larger chain migration
patch originally by Connor O'Brien.
Cc: Joel Fernandes <joelaf@google.com>
Cc: Qais Yousef <qyousef@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Valentin Schneider <vschneid@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ben Segall <bsegall@google.com>
Cc: Zimuzo Ezeozue <zezeozue@google.com>
Cc: Youssef Esmat <youssefesmat@google.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
Cc: Will Deacon <will@kernel.org>
Cc: Waiman Long <longman@redhat.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: Metin Kaya <Metin.Kaya@arm.com>
Cc: Xuewen Yan <xuewen.yan94@gmail.com>
Cc: K Prateek Nayak <kprateek.nayak@amd.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: kernel-team@android.com
Signed-off-by: Connor O'Brien <connoro@google.com>
[jstultz: split out from larger chain migration patch]
Signed-off-by: John Stultz <jstultz@google.com>
---
kernel/sched/core.c | 42 +++++++++++++++++++++++++++++++++++++++++
kernel/sched/cpupri.c | 11 ++++++++---
kernel/sched/deadline.c | 15 +++++++++++++--
kernel/sched/rt.c | 9 ++++++++-
kernel/sched/sched.h | 10 ++++++++++
5 files changed, 81 insertions(+), 6 deletions(-)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0c212dcd4b7a..77a79d5f829a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3896,6 +3896,48 @@ static void activate_blocked_entities(struct rq *target_rq,
}
raw_spin_unlock_irqrestore(&owner->blocked_lock, flags);
}
+
+static inline bool task_queued_on_rq(struct rq *rq, struct task_struct *task)
+{
+ if (!task_on_rq_queued(task))
+ return false;
+ smp_rmb();
+ if (task_rq(task) != rq)
+ return false;
+ smp_rmb();
+ if (!task_on_rq_queued(task))
+ return false;
+ return true;
+}
+
+/*
+ * Returns the unblocked task at the end of the blocked chain starting with p
+ * if that chain is composed entirely of tasks enqueued on rq, or NULL otherwise.
+ */
+struct task_struct *find_exec_ctx(struct rq *rq, struct task_struct *p)
+{
+ struct task_struct *exec_ctx, *owner;
+ struct mutex *mutex;
+
+ if (!sched_proxy_exec())
+ return p;
+
+ lockdep_assert_rq_held(rq);
+
+ for (exec_ctx = p; task_is_blocked(exec_ctx) && !task_on_cpu(rq, exec_ctx);
+ exec_ctx = owner) {
+ mutex = exec_ctx->blocked_on;
+ owner = __mutex_owner(mutex);
+ if (owner == exec_ctx)
+ break;
+
+ if (!task_queued_on_rq(rq, owner) || task_current_selected(rq, owner)) {
+ exec_ctx = NULL;
+ break;
+ }
+ }
+ return exec_ctx;
+}
#else /* !CONFIG_SCHED_PROXY_EXEC */
static inline void do_activate_task(struct rq *rq, struct task_struct *p,
int en_flags)
diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
index 15e947a3ded7..53be78afdd07 100644
--- a/kernel/sched/cpupri.c
+++ b/kernel/sched/cpupri.c
@@ -96,12 +96,17 @@ static inline int __cpupri_find(struct cpupri *cp, struct task_struct *p,
if (skip)
return 0;
- if (cpumask_any_and(&p->cpus_mask, vec->mask) >= nr_cpu_ids)
+ if ((p && cpumask_any_and(&p->cpus_mask, vec->mask) >= nr_cpu_ids) ||
+ (!p && cpumask_any(vec->mask) >= nr_cpu_ids))
return 0;
if (lowest_mask) {
- cpumask_and(lowest_mask, &p->cpus_mask, vec->mask);
- cpumask_and(lowest_mask, lowest_mask, cpu_active_mask);
+ if (p) {
+ cpumask_and(lowest_mask, &p->cpus_mask, vec->mask);
+ cpumask_and(lowest_mask, lowest_mask, cpu_active_mask);
+ } else {
+ cpumask_copy(lowest_mask, vec->mask);
+ }
/*
* We have to ensure that we have at least one bit
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 999bd17f11c4..21e56ac58e32 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1866,6 +1866,8 @@ static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused
static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
{
+ struct task_struct *exec_ctx;
+
/*
* Current can't be migrated, useless to reschedule,
* let's hope p can move out.
@@ -1874,12 +1876,16 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
!cpudl_find(&rq->rd->cpudl, rq_selected(rq), rq->curr, NULL))
return;
+ exec_ctx = find_exec_ctx(rq, p);
+ if (task_current(rq, exec_ctx))
+ return;
+
/*
* p is migratable, so let's not schedule it and
* see if it is pushed or pulled somewhere else.
*/
if (p->nr_cpus_allowed != 1 &&
- cpudl_find(&rq->rd->cpudl, p, p, NULL))
+ cpudl_find(&rq->rd->cpudl, p, exec_ctx, NULL))
return;
resched_curr(rq);
@@ -2169,12 +2175,17 @@ static int find_later_rq(struct task_struct *sched_ctx, struct task_struct *exec
/* Locks the rq it finds */
static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
{
+ struct task_struct *exec_ctx;
struct rq *later_rq = NULL;
int tries;
int cpu;
for (tries = 0; tries < DL_MAX_TRIES; tries++) {
- cpu = find_later_rq(task, task);
+ exec_ctx = find_exec_ctx(rq, task);
+ if (!exec_ctx)
+ break;
+
+ cpu = find_later_rq(task, exec_ctx);
if ((cpu == -1) || (cpu == rq->cpu))
break;
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 6371b0fca4ad..f8134d062fa3 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1640,6 +1640,11 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
!cpupri_find(&rq->rd->cpupri, rq_selected(rq), rq->curr, NULL))
return;
+ /* No reason to preempt since rq->curr wouldn't change anyway */
+ exec_ctx = find_exec_ctx(rq, p);
+ if (task_current(rq, exec_ctx))
+ return;
+
/*
* p is migratable, so let's not schedule it and
* see if it is pushed or pulled somewhere else.
@@ -1933,12 +1938,14 @@ static int find_lowest_rq(struct task_struct *sched_ctx, struct task_struct *exe
/* Will lock the rq it finds */
static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
{
+ struct task_struct *exec_ctx;
struct rq *lowest_rq = NULL;
int tries;
int cpu;
for (tries = 0; tries < RT_MAX_TRIES; tries++) {
- cpu = find_lowest_rq(task, task);
+ exec_ctx = find_exec_ctx(rq, task);
+ cpu = find_lowest_rq(task, exec_ctx);
if ((cpu == -1) || (cpu == rq->cpu))
break;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index ef3d327e267c..6cd473224cfe 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -3564,6 +3564,16 @@ int task_is_pushable(struct rq *rq, struct task_struct *p, int cpu)
return 0;
}
+
+#ifdef CONFIG_SCHED_PROXY_EXEC
+struct task_struct *find_exec_ctx(struct rq *rq, struct task_struct *p);
+#else /* !CONFIG_SCHED_PROXY_EXEC */
+static inline
+struct task_struct *find_exec_ctx(struct rq *rq, struct task_struct *p)
+{
+ return p;
+}
+#endif /* CONFIG_SCHED_PROXY_EXEC */
#endif
#endif /* _KERNEL_SCHED_SCHED_H */
--
2.43.0.472.g3155946c3a-goog
On 20/12/2023 12:18 am, John Stultz wrote:
> From: Connor O'Brien <connoro@google.com>
>
> Add a helper to find the runnable owner down a chain of blocked waiters
>
> This patch was broken out from a larger chain migration
> patch originally by Connor O'Brien.
>
> Cc: Joel Fernandes <joelaf@google.com>
> Cc: Qais Yousef <qyousef@google.com>
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: Juri Lelli <juri.lelli@redhat.com>
> Cc: Vincent Guittot <vincent.guittot@linaro.org>
> Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
> Cc: Valentin Schneider <vschneid@redhat.com>
> Cc: Steven Rostedt <rostedt@goodmis.org>
> Cc: Ben Segall <bsegall@google.com>
> Cc: Zimuzo Ezeozue <zezeozue@google.com>
> Cc: Youssef Esmat <youssefesmat@google.com>
> Cc: Mel Gorman <mgorman@suse.de>
> Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
> Cc: Will Deacon <will@kernel.org>
> Cc: Waiman Long <longman@redhat.com>
> Cc: Boqun Feng <boqun.feng@gmail.com>
> Cc: "Paul E. McKenney" <paulmck@kernel.org>
> Cc: Metin Kaya <Metin.Kaya@arm.com>
> Cc: Xuewen Yan <xuewen.yan94@gmail.com>
> Cc: K Prateek Nayak <kprateek.nayak@amd.com>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: kernel-team@android.com
> Signed-off-by: Connor O'Brien <connoro@google.com>
> [jstultz: split out from larger chain migration patch]
> Signed-off-by: John Stultz <jstultz@google.com>
> ---
> kernel/sched/core.c | 42 +++++++++++++++++++++++++++++++++++++++++
> kernel/sched/cpupri.c | 11 ++++++++---
> kernel/sched/deadline.c | 15 +++++++++++++--
> kernel/sched/rt.c | 9 ++++++++-
> kernel/sched/sched.h | 10 ++++++++++
> 5 files changed, 81 insertions(+), 6 deletions(-)
>
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 0c212dcd4b7a..77a79d5f829a 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -3896,6 +3896,48 @@ static void activate_blocked_entities(struct rq *target_rq,
> }
> raw_spin_unlock_irqrestore(&owner->blocked_lock, flags);
> }
> +
> +static inline bool task_queued_on_rq(struct rq *rq, struct task_struct *task)
> +{
> + if (!task_on_rq_queued(task))
> + return false;
> + smp_rmb();
> + if (task_rq(task) != rq)
> + return false;
> + smp_rmb();
> + if (!task_on_rq_queued(task))
> + return false;
* Super-nit: we may want to have empty lines between `if` blocks and
before/after `smp_rmb()` calls.
* I did not understand why we call `task_on_rq_queued(task)` twice.
Should we have an explanatory comment before the function definition?
> + return true;
> +}
> +
> +/*
> + * Returns the unblocked task at the end of the blocked chain starting with p
> + * if that chain is composed entirely of tasks enqueued on rq, or NULL otherwise.
> + */
> +struct task_struct *find_exec_ctx(struct rq *rq, struct task_struct *p)
> +{
> + struct task_struct *exec_ctx, *owner;
> + struct mutex *mutex;
> +
> + if (!sched_proxy_exec())
> + return p;
> +
> + lockdep_assert_rq_held(rq);
> +
> + for (exec_ctx = p; task_is_blocked(exec_ctx) && !task_on_cpu(rq, exec_ctx);
> + exec_ctx = owner) {
> + mutex = exec_ctx->blocked_on;
> + owner = __mutex_owner(mutex);
> + if (owner == exec_ctx)
> + break;
> +
> + if (!task_queued_on_rq(rq, owner) || task_current_selected(rq, owner)) {
> + exec_ctx = NULL;
> + break;
> + }
> + }
> + return exec_ctx;
> +}
> #else /* !CONFIG_SCHED_PROXY_EXEC */
> static inline void do_activate_task(struct rq *rq, struct task_struct *p,
> int en_flags)
> diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
> index 15e947a3ded7..53be78afdd07 100644
> --- a/kernel/sched/cpupri.c
> +++ b/kernel/sched/cpupri.c
> @@ -96,12 +96,17 @@ static inline int __cpupri_find(struct cpupri *cp, struct task_struct *p,
> if (skip)
> return 0;
>
> - if (cpumask_any_and(&p->cpus_mask, vec->mask) >= nr_cpu_ids)
> + if ((p && cpumask_any_and(&p->cpus_mask, vec->mask) >= nr_cpu_ids) ||
> + (!p && cpumask_any(vec->mask) >= nr_cpu_ids))
> return 0;
>
> if (lowest_mask) {
> - cpumask_and(lowest_mask, &p->cpus_mask, vec->mask);
> - cpumask_and(lowest_mask, lowest_mask, cpu_active_mask);
> + if (p) {
> + cpumask_and(lowest_mask, &p->cpus_mask, vec->mask);
> + cpumask_and(lowest_mask, lowest_mask, cpu_active_mask);
> + } else {
> + cpumask_copy(lowest_mask, vec->mask);
> + }
I think changes in `cpupri.c` should be part of previous (`sched: Push
execution and scheduler context split into deadline and rt paths`)
patch. Because they don't seem to be related with find_exec_ctx()?
>
> /*
> * We have to ensure that we have at least one bit
> diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
> index 999bd17f11c4..21e56ac58e32 100644
> --- a/kernel/sched/deadline.c
> +++ b/kernel/sched/deadline.c
> @@ -1866,6 +1866,8 @@ static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused
>
> static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
> {
> + struct task_struct *exec_ctx;
> +
> /*
> * Current can't be migrated, useless to reschedule,
> * let's hope p can move out.
> @@ -1874,12 +1876,16 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
> !cpudl_find(&rq->rd->cpudl, rq_selected(rq), rq->curr, NULL))
> return;
>
> + exec_ctx = find_exec_ctx(rq, p);
> + if (task_current(rq, exec_ctx))
> + return;
> +
> /*
> * p is migratable, so let's not schedule it and
> * see if it is pushed or pulled somewhere else.
> */
> if (p->nr_cpus_allowed != 1 &&
> - cpudl_find(&rq->rd->cpudl, p, p, NULL))
> + cpudl_find(&rq->rd->cpudl, p, exec_ctx, NULL))
> return;
>
> resched_curr(rq);
> @@ -2169,12 +2175,17 @@ static int find_later_rq(struct task_struct *sched_ctx, struct task_struct *exec
> /* Locks the rq it finds */
> static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
> {
> + struct task_struct *exec_ctx;
> struct rq *later_rq = NULL;
> int tries;
> int cpu;
>
> for (tries = 0; tries < DL_MAX_TRIES; tries++) {
> - cpu = find_later_rq(task, task);
> + exec_ctx = find_exec_ctx(rq, task);
> + if (!exec_ctx)
> + break;
> +
> + cpu = find_later_rq(task, exec_ctx);
>
Super-nit: this empty line should be removed to keep logically connected
lines closer.
The same for find_lock_lowest_rq().
> if ((cpu == -1) || (cpu == rq->cpu))
> break;
> diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
> index 6371b0fca4ad..f8134d062fa3 100644
> --- a/kernel/sched/rt.c
> +++ b/kernel/sched/rt.c
> @@ -1640,6 +1640,11 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
> !cpupri_find(&rq->rd->cpupri, rq_selected(rq), rq->curr, NULL))
> return;
>
> + /* No reason to preempt since rq->curr wouldn't change anyway */
> + exec_ctx = find_exec_ctx(rq, p);
> + if (task_current(rq, exec_ctx))
> + return;
> +
> /*
> * p is migratable, so let's not schedule it and
> * see if it is pushed or pulled somewhere else.
> @@ -1933,12 +1938,14 @@ static int find_lowest_rq(struct task_struct *sched_ctx, struct task_struct *exe
> /* Will lock the rq it finds */
> static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
> {
> + struct task_struct *exec_ctx;
> struct rq *lowest_rq = NULL;
> int tries;
> int cpu;
>
> for (tries = 0; tries < RT_MAX_TRIES; tries++) {
> - cpu = find_lowest_rq(task, task);
> + exec_ctx = find_exec_ctx(rq, task);
> + cpu = find_lowest_rq(task, exec_ctx);
>
> if ((cpu == -1) || (cpu == rq->cpu))
> break;
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index ef3d327e267c..6cd473224cfe 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -3564,6 +3564,16 @@ int task_is_pushable(struct rq *rq, struct task_struct *p, int cpu)
>
> return 0;
> }
> +
> +#ifdef CONFIG_SCHED_PROXY_EXEC
> +struct task_struct *find_exec_ctx(struct rq *rq, struct task_struct *p);
> +#else /* !CONFIG_SCHED_PROXY_EXEC */
> +static inline
> +struct task_struct *find_exec_ctx(struct rq *rq, struct task_struct *p)
> +{
> + return p;
> +}
> +#endif /* CONFIG_SCHED_PROXY_EXEC */
> #endif
Nit: `#ifdef CONFIG_SMP` block becomes bigger after this hunk. We should
append `/* CONFIG_SMP */` to this line, IMHO.
>
> #endif /* _KERNEL_SCHED_SCHED_H */
On Fri, Dec 22, 2023 at 3:57 AM Metin Kaya <metin.kaya@arm.com> wrote:
> On 20/12/2023 12:18 am, John Stultz wrote:
> > diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> > index 0c212dcd4b7a..77a79d5f829a 100644
> > --- a/kernel/sched/core.c
> > +++ b/kernel/sched/core.c
> > @@ -3896,6 +3896,48 @@ static void activate_blocked_entities(struct rq *target_rq,
> > }
> > raw_spin_unlock_irqrestore(&owner->blocked_lock, flags);
> > }
> > +
> > +static inline bool task_queued_on_rq(struct rq *rq, struct task_struct *task)
> > +{
> > + if (!task_on_rq_queued(task))
> > + return false;
> > + smp_rmb();
> > + if (task_rq(task) != rq)
> > + return false;
> > + smp_rmb();
> > + if (!task_on_rq_queued(task))
> > + return false;
>
> * Super-nit: we may want to have empty lines between `if` blocks and
> before/after `smp_rmb()` calls.
Done.
> * I did not understand why we call `task_on_rq_queued(task)` twice.
> Should we have an explanatory comment before the function definition?
Yeah. I'll put a better comment on my todo there.
> > diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
> > index 15e947a3ded7..53be78afdd07 100644
> > --- a/kernel/sched/cpupri.c
> > +++ b/kernel/sched/cpupri.c
> > @@ -96,12 +96,17 @@ static inline int __cpupri_find(struct cpupri *cp, struct task_struct *p,
> > if (skip)
> > return 0;
> >
> > - if (cpumask_any_and(&p->cpus_mask, vec->mask) >= nr_cpu_ids)
> > + if ((p && cpumask_any_and(&p->cpus_mask, vec->mask) >= nr_cpu_ids) ||
> > + (!p && cpumask_any(vec->mask) >= nr_cpu_ids))
> > return 0;
> >
> > if (lowest_mask) {
> > - cpumask_and(lowest_mask, &p->cpus_mask, vec->mask);
> > - cpumask_and(lowest_mask, lowest_mask, cpu_active_mask);
> > + if (p) {
> > + cpumask_and(lowest_mask, &p->cpus_mask, vec->mask);
> > + cpumask_and(lowest_mask, lowest_mask, cpu_active_mask);
> > + } else {
> > + cpumask_copy(lowest_mask, vec->mask);
> > + }
>
> I think changes in `cpupri.c` should be part of previous (`sched: Push
> execution and scheduler context split into deadline and rt paths`)
> patch. Because they don't seem to be related with find_exec_ctx()?
So, it's here only because find_exec_ctx() can return null, so we have
to have the null p checks.
I'll think a bit if we can avoid it here.
> > @@ -2169,12 +2175,17 @@ static int find_later_rq(struct task_struct *sched_ctx, struct task_struct *exec
> > /* Locks the rq it finds */
> > static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
> > {
> > + struct task_struct *exec_ctx;
> > struct rq *later_rq = NULL;
> > int tries;
> > int cpu;
> >
> > for (tries = 0; tries < DL_MAX_TRIES; tries++) {
> > - cpu = find_later_rq(task, task);
> > + exec_ctx = find_exec_ctx(rq, task);
> > + if (!exec_ctx)
> > + break;
> > +
> > + cpu = find_later_rq(task, exec_ctx);
> >
>
> Super-nit: this empty line should be removed to keep logically connected
> lines closer.
Done.
> > +#ifdef CONFIG_SCHED_PROXY_EXEC
> > +struct task_struct *find_exec_ctx(struct rq *rq, struct task_struct *p);
> > +#else /* !CONFIG_SCHED_PROXY_EXEC */
> > +static inline
> > +struct task_struct *find_exec_ctx(struct rq *rq, struct task_struct *p)
> > +{
> > + return p;
> > +}
> > +#endif /* CONFIG_SCHED_PROXY_EXEC */
> > #endif
>
> Nit: `#ifdef CONFIG_SMP` block becomes bigger after this hunk. We should
> append `/* CONFIG_SMP */` to this line, IMHO.
>
Done.
Thanks for the feedback!
-john
© 2016 - 2025 Red Hat, Inc.