This patch adds logic so try_to_wake_up() will notice if we are
waking a task where blocked_on == PROXY_WAKING, and if necessary
dequeue the task so the wakeup will naturally return-migrate the
donor task back to a cpu it can run on.
This helps performance as we do the dequeue and wakeup under the
locks normally taken in the try_to_wake_up() and avoids having
to do proxy_force_return() from __schedule(), which has to
re-take similar locks and then force a pick again loop.
This was split out from the larger proxy patch, and
significantly reworked.
Credits for the original patch go to:
Peter Zijlstra (Intel) <peterz@infradead.org>
Juri Lelli <juri.lelli@redhat.com>
Valentin Schneider <valentin.schneider@arm.com>
Connor O'Brien <connoro@google.com>
Signed-off-by: John Stultz <jstultz@google.com>
---
v24:
* Reworked proxy_needs_return() so its less nested as suggested
by K Prateek
* Switch to using block_task with DEQUEUE_SPECIAL as suggested
by K Prateek
* Fix edge case to reset wake_cpu if select_task_rq() chooses
the current rq and we skip set_task_cpu()
Cc: Joel Fernandes <joelagnelf@nvidia.com>
Cc: Qais Yousef <qyousef@layalina.io>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Valentin Schneider <vschneid@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ben Segall <bsegall@google.com>
Cc: Zimuzo Ezeozue <zezeozue@google.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Will Deacon <will@kernel.org>
Cc: Waiman Long <longman@redhat.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: Metin Kaya <Metin.Kaya@arm.com>
Cc: Xuewen Yan <xuewen.yan94@gmail.com>
Cc: K Prateek Nayak <kprateek.nayak@amd.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: kuyo chang <kuyo.chang@mediatek.com>
Cc: hupu <hupu.gm@gmail.com>
Cc: kernel-team@android.com
---
kernel/sched/core.c | 84 +++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 82 insertions(+), 2 deletions(-)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index fcf64c4db437e..e4a49c694dcd9 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3697,6 +3697,64 @@ static inline void proxy_set_task_cpu(struct task_struct *p, int cpu)
__set_task_cpu(p, cpu);
p->wake_cpu = wake_cpu;
}
+
+static bool proxy_task_runnable_but_waking(struct task_struct *p)
+{
+ if (!sched_proxy_exec())
+ return false;
+ return (READ_ONCE(p->__state) == TASK_RUNNING &&
+ READ_ONCE(p->blocked_on) == PROXY_WAKING);
+}
+
+static inline struct task_struct *proxy_resched_idle(struct rq *rq);
+
+/*
+ * Checks to see if task p has been proxy-migrated to another rq
+ * and needs to be returned. If so, we deactivate the task here
+ * so that it can be properly woken up on the p->wake_cpu
+ * (or whichever cpu select_task_rq() picks at the bottom of
+ * try_to_wake_up()
+ */
+static inline bool proxy_needs_return(struct rq *rq, struct task_struct *p)
+{
+ if (!sched_proxy_exec())
+ return false;
+
+ guard(raw_spinlock)(&p->blocked_lock);
+
+ /* If task isn't PROXY_WAKING, we don't need to do return migration */
+ if (p->blocked_on != PROXY_WAKING)
+ return false;
+
+ __clear_task_blocked_on(p, PROXY_WAKING);
+
+ /* If already current, don't need to return migrate */
+ if (task_current(rq, p))
+ return false;
+
+ /* If wake_cpu is targeting this cpu, don't bother return migrating */
+ if (p->wake_cpu == cpu_of(rq)) {
+ resched_curr(rq);
+ return false;
+ }
+
+ /* If we're return migrating the rq->donor, switch it out for idle */
+ if (task_current_donor(rq, p))
+ proxy_resched_idle(rq);
+
+ /* (ab)Use DEQUEUE_SPECIAL to ensure task is always blocked here. */
+ block_task(rq, p, DEQUEUE_NOCLOCK | DEQUEUE_SPECIAL);
+ return true;
+}
+#else /* !CONFIG_SCHED_PROXY_EXEC */
+static bool proxy_task_runnable_but_waking(struct task_struct *p)
+{
+ return false;
+}
+static inline bool proxy_needs_return(struct rq *rq, struct task_struct *p)
+{
+ return false;
+}
#endif /* CONFIG_SCHED_PROXY_EXEC */
static void
@@ -3784,6 +3842,8 @@ static int ttwu_runnable(struct task_struct *p, int wake_flags)
update_rq_clock(rq);
if (p->se.sched_delayed)
enqueue_task(rq, p, ENQUEUE_NOCLOCK | ENQUEUE_DELAYED);
+ if (proxy_needs_return(rq, p))
+ goto out;
if (!task_on_cpu(rq, p)) {
/*
* When on_rq && !on_cpu the task is preempted, see if
@@ -3794,6 +3854,7 @@ static int ttwu_runnable(struct task_struct *p, int wake_flags)
ttwu_do_wakeup(p);
ret = 1;
}
+out:
__task_rq_unlock(rq, &rf);
return ret;
@@ -4181,6 +4242,8 @@ int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
* it disabling IRQs (this allows not taking ->pi_lock).
*/
WARN_ON_ONCE(p->se.sched_delayed);
+ /* If p is current, we know we can run here, so clear blocked_on */
+ clear_task_blocked_on(p, NULL);
if (!ttwu_state_match(p, state, &success))
goto out;
@@ -4197,8 +4260,15 @@ int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
*/
scoped_guard (raw_spinlock_irqsave, &p->pi_lock) {
smp_mb__after_spinlock();
- if (!ttwu_state_match(p, state, &success))
- break;
+ if (!ttwu_state_match(p, state, &success)) {
+ /*
+ * If we're already TASK_RUNNING, and PROXY_WAKING
+ * continue on to ttwu_runnable check to force
+ * proxy_needs_return evaluation
+ */
+ if (!proxy_task_runnable_but_waking(p))
+ break;
+ }
trace_sched_waking(p);
@@ -4305,6 +4375,16 @@ int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
wake_flags |= WF_MIGRATED;
psi_ttwu_dequeue(p);
set_task_cpu(p, cpu);
+ } else if (cpu != p->wake_cpu) {
+ /*
+ * If we were proxy-migrated to cpu, then
+ * select_task_rq() picks cpu instead of wake_cpu
+ * to return to, we won't call set_task_cpu(),
+ * leaving a stale wake_cpu pointing to where we
+ * proxy-migrated from. So just fixup wake_cpu here
+ * if its not correct
+ */
+ p->wake_cpu = cpu;
}
ttwu_queue(p, cpu, wake_flags);
--
2.52.0.487.g5c8c507ade-goog
Hello John,
On 11/25/2025 4:01 AM, John Stultz wrote:
> @@ -4197,8 +4260,15 @@ int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
> */
> scoped_guard (raw_spinlock_irqsave, &p->pi_lock) {
> smp_mb__after_spinlock();
> - if (!ttwu_state_match(p, state, &success))
> - break;
> + if (!ttwu_state_match(p, state, &success)) {
> + /*
> + * If we're already TASK_RUNNING, and PROXY_WAKING
> + * continue on to ttwu_runnable check to force
> + * proxy_needs_return evaluation
> + */
> + if (!proxy_task_runnable_but_waking(p))
> + break;
> + }
I don't like the fact that we have to go against the ttwu_state_match()
machinery here or the fact that proxy_deactivate() has to check for
"TASK_RUNNING" as a short-cut to detect wakeups. It makes all these bits
a bit more painful to maintain IMO.
Here is what I've learnt so far:
o Task can go into an interruptible sleep where a signal can cause the
task to give up its attempt to acquire the lock ("err" path in
__mutex_lock_common()) - This means we'll have to re-evaluate the
"p->blocked_on" by running it.
o If we weren't running with the PROXY_EXEC, a task that was blocked
(including delayed) on a mutex will definitely receive a wakeup. With
proxy, the only difference is that that wakeup path will now see
"task_on_rq_queued()" and will have to go though ttwu_runnable() under
rq_lock.
o Everything else is handled in __schedule() under the rq_lock again
and all paths that drop the rq_lock will go through a re-pick.
o Once we deactivate a donor (via proxy_deactivate()), the "blocked_on"
relation has no real use since we'll always be PROXY_WAKING when we
come back (or we have received a signal and that blocked_on relation
now needs a re-evaluation anyways)
I'm sure a bunch of these hold up for RWSEM too.
With that in mind, we can actually generalize ttwu_runnable() and
proxy_needs_return() to clear "p->blocked_on", even for !PROXY_WAKING
since a wakeup implies we have to re-evaluate the "p->blocked_on".
We already have clear_task_blocked_on() within the rq_lock critical
section (except for when task is running) and we ensure blocked donors
cannot go TASK_RUNNING with task_is_blocked() != NULL.
This is what I had in mind based on top of commit d424d28ea93
("sched: Migrate whole chain in proxy_migrate_task()") on
"proxy-exec-v24-6.18-rc6" branch:
(lightly tested with sched-messaging; Haven't seen any splats
or hung-task yet but from my experience, it doesn't hold any
water when my suggestion meets the real-world :-)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0c50d154050a..cb567c219f04 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3698,15 +3698,14 @@ static inline void proxy_set_task_cpu(struct task_struct *p, int cpu)
p->wake_cpu = wake_cpu;
}
-static bool proxy_task_runnable_but_waking(struct task_struct *p)
+static inline void proxy_reset_donor(struct rq *rq)
{
- if (!sched_proxy_exec())
- return false;
- return (READ_ONCE(p->__state) == TASK_RUNNING &&
- READ_ONCE(p->blocked_on) == PROXY_WAKING);
-}
+ WARN_ON_ONCE(rq->donor == rq->curr);
-static inline struct task_struct *proxy_resched_idle(struct rq *rq);
+ put_prev_set_next_task(rq, rq->donor, rq->curr);
+ rq_set_donor(rq, rq->curr);
+ resched_curr(rq);
+}
/*
* Checks to see if task p has been proxy-migrated to another rq
@@ -3720,37 +3719,55 @@ static inline bool proxy_needs_return(struct rq *rq, struct task_struct *p)
if (!sched_proxy_exec())
return false;
- guard(raw_spinlock)(&p->blocked_lock);
-
- /* If task isn't PROXY_WAKING, we don't need to do return migration */
- if (p->blocked_on != PROXY_WAKING)
+ /*
+ * A preempted task isn't blocked on a mutex, let ttwu_runnable()
+ * handle the rest.
+ *
+ * Since we are under the rq_lock and "p->blocked_on" can only
+ * be transitioned to NULL under the rq_lock for a preempted task, it
+ * is safe to inspect the blocked_on relation outside the blocked_lock.
+ */
+ if (!task_current(rq, p) && !p->blocked_on)
return false;
- __clear_task_blocked_on(p, PROXY_WAKING);
+ guard(raw_spinlock)(&p->blocked_lock);
- /* If already current, don't need to return migrate */
- if (task_current(rq, p))
+ /* Task isn't blocked, let ttwu_runnable() handle the rest. */
+ if (!p->blocked_on)
return false;
+ /*
+ * Blocked task is waking up - either to grab a lock, or to
+ * handle a signal. Clear the blocked_on relation to run the
+ * task. The task will re-establish the blocked_on relation if
+ * it blocks on the lock again.
+ *
+ * A concurrent wakeup from the lock owner will not set
+ * PROXY_WAKING since we are already running. ttwu_state_match()
+ * under "p->pi_lock" will ensure there is no race.
+ */
+ __clear_task_blocked_on(p, NULL);
- /* If wake_cpu is targeting this cpu, don't bother return migrating */
- if (p->wake_cpu == cpu_of(rq)) {
- resched_curr(rq);
+ /* If already current, let ttwu_runnable() handle the rest. */
+ if (task_current(rq, p))
return false;
- }
- /* If we're return migrating the rq->donor, switch it out for idle */
+ /*
+ * If we're waking up the the rq->donor, switch the context
+ * back to rq->curr to account the rest of the runtime to
+ * rq->curr and issue a resched for __schedule() to
+ * re-evalaute the context as soon as possible.
+ *
+ * Since we are under the rq_lock, it is safe to swap out the
+ * donor.
+ */
if (task_current_donor(rq, p))
- proxy_resched_idle(rq);
+ proxy_reset_donor(rq);
/* (ab)Use DEQUEUE_SPECIAL to ensure task is always blocked here. */
block_task(rq, p, DEQUEUE_NOCLOCK | DEQUEUE_SPECIAL);
return true;
}
#else /* !CONFIG_SCHED_PROXY_EXEC */
-static bool proxy_task_runnable_but_waking(struct task_struct *p)
-{
- return false;
-}
static inline bool proxy_needs_return(struct rq *rq, struct task_struct *p)
{
return false;
@@ -4260,15 +4277,8 @@ int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
*/
scoped_guard (raw_spinlock_irqsave, &p->pi_lock) {
smp_mb__after_spinlock();
- if (!ttwu_state_match(p, state, &success)) {
- /*
- * If we're already TASK_RUNNING, and PROXY_WAKING
- * continue on to ttwu_runnable check to force
- * proxy_needs_return evaluation
- */
- if (!proxy_task_runnable_but_waking(p))
- break;
- }
+ if (!ttwu_state_match(p, state, &success))
+ break;
trace_sched_waking(p);
@@ -6634,12 +6644,15 @@ pick_next_task(struct rq *rq, struct rq_flags *rf)
* blocked_on).
*/
static bool try_to_block_task(struct rq *rq, struct task_struct *p,
- unsigned long *task_state_p, bool should_block)
+ unsigned long *task_state_p)
{
unsigned long task_state = *task_state_p;
int flags = DEQUEUE_NOCLOCK;
if (signal_pending_state(task_state, p)) {
+ /* See the next comment for why this is safe. */
+ if (task_is_blocked(p))
+ clear_task_blocked_on(p, NULL);
WRITE_ONCE(p->__state, TASK_RUNNING);
*task_state_p = TASK_RUNNING;
return false;
@@ -6651,8 +6664,23 @@ static bool try_to_block_task(struct rq *rq, struct task_struct *p,
* should_block is false, its likely due to the task being
* blocked on a mutex, and we want to keep it on the runqueue
* to be selectable for proxy-execution.
+ *
+ * Checks against wakeup is safe since we are here with on_rq
+ * and rq_lock held forcing the wakeup on ttwu_runnable()
+ * path and "p->blocked_on" cannot be cleared outside of the
+ * rq_lock when task is preempted.
+ *
+ * XXX: This can be further optimized to block the task on
+ * PROXY_WAKING but that will require grabbing the
+ * "blocked_lock" to inspect "blocked_on" and clearing it iff
+ * PROXY_WAKING.
+ *
+ * If we block with PROXY_WAKING, proxy_needs_return() in
+ * ttwu_runnable() will be skipped and we'll hit TASK_RUNNING
+ * without clearing "blocked_on" which will make
+ * proxy_deactivate() in proxy_wake_up_donor() unhappy :-(
*/
- if (!should_block)
+ if (task_is_blocked(p))
return false;
p->sched_contributes_to_load =
@@ -6690,10 +6718,23 @@ static inline struct task_struct *proxy_resched_idle(struct rq *rq)
static bool proxy_deactivate(struct rq *rq, struct task_struct *donor)
{
unsigned long state = READ_ONCE(donor->__state);
+ bool ret;
- /* Don't deactivate if the state has been changed to TASK_RUNNING */
- if (state == TASK_RUNNING)
+ /*
+ * We should **NEVER** be TASK_RUNNING! TASK_RUNNING implies a
+ * blocked doner got a wakeup without clearing the "blocked_on"
+ * relation and has take a path other than ttwu_runnable().
+ *
+ * Warn to catch such cases since this can affect
+ * proxy_wake_up_donor() where the wake_up_process() might not
+ * be able to serialize wakeups using ttwu state matching.
+ *
+ * Fundamentally the task is runnable so we can go ahead and
+ * indicate the blocking failed and just run the task.
+ */
+ if (WARN_ON_ONCE(state == TASK_RUNNING))
return false;
+
/*
* Because we got donor from pick_next_task(), it is *crucial*
* that we call proxy_resched_idle() before we deactivate it.
@@ -6704,7 +6745,20 @@ static bool proxy_deactivate(struct rq *rq, struct task_struct *donor)
* need to be changed from next *before* we deactivate.
*/
proxy_resched_idle(rq);
- return try_to_block_task(rq, donor, &state, true);
+ ret = try_to_block_task(rq, donor, &state);
+
+ /*
+ * If we fail to block, it must be because of a pending signal
+ * since parallel wakeup needs to grab the rq_lock before
+ * activating us at this point.
+ *
+ * The task_is_blocked() path doesn't modify the state so check
+ * against the state and warn if the blocked_on relation caused
+ * the task to fail blocking.
+ */
+ if (WARN_ON_ONCE(!ret && (state != TASK_RUNNING)))
+ WRITE_ONCE(donor->__state, TASK_RUNNING);
+ return ret;
}
/*
@@ -6774,89 +6828,33 @@ static void proxy_migrate_task(struct rq *rq, struct rq_flags *rf,
update_rq_clock(rq);
}
-static void proxy_force_return(struct rq *rq, struct rq_flags *rf,
+static bool proxy_wake_up_donor(struct rq *rq, struct rq_flags *rf,
struct task_struct *p)
{
- struct rq *this_rq, *target_rq;
- struct rq_flags this_rf;
- int cpu, wake_flag = 0;
-
+ WARN_ON_ONCE(task_current(rq, p));
lockdep_assert_rq_held(rq);
- WARN_ON(p == rq->curr);
-
- get_task_struct(p);
/*
- * We have to zap callbacks before unlocking the rq
- * as another CPU may jump in and call sched_balance_rq
- * which can trip the warning in rq_pin_lock() if we
- * leave callbacks set.
+ * Task received a signal or was woken up!
+ * It should be safe to run it now.
*/
+ if (!proxy_deactivate(rq ,p))
+ return false;
+
zap_balance_callbacks(rq);
rq_unpin_lock(rq, rf);
raw_spin_rq_unlock(rq);
/*
- * We drop the rq lock, and re-grab task_rq_lock to get
- * the pi_lock (needed for select_task_rq) as well.
- */
- this_rq = task_rq_lock(p, &this_rf);
-
- /*
- * Since we let go of the rq lock, the task may have been
- * woken or migrated to another rq before we got the
- * task_rq_lock. So re-check we're on the same RQ. If
- * not, the task has already been migrated and that CPU
- * will handle any futher migrations.
+ * Everything beyond this point is serialized
+ * by the ttwu state machine.
*/
- if (this_rq != rq)
- goto err_out;
-
- /* Similarly, if we've been dequeued, someone else will wake us */
- if (!task_on_rq_queued(p))
- goto err_out;
+ wake_up_process(p);
- /*
- * Since we should only be calling here from __schedule()
- * -> find_proxy_task(), no one else should have
- * assigned current out from under us. But check and warn
- * if we see this, then bail.
- */
- if (task_current(this_rq, p) || task_on_cpu(this_rq, p)) {
- WARN_ONCE(1, "%s rq: %i current/on_cpu task %s %d on_cpu: %i\n",
- __func__, cpu_of(this_rq),
- p->comm, p->pid, p->on_cpu);
- goto err_out;
- }
-
- update_rq_clock(this_rq);
- proxy_resched_idle(this_rq);
- deactivate_task(this_rq, p, DEQUEUE_NOCLOCK);
- cpu = select_task_rq(p, p->wake_cpu, &wake_flag);
- set_task_cpu(p, cpu);
- target_rq = cpu_rq(cpu);
- clear_task_blocked_on(p, NULL);
- task_rq_unlock(this_rq, p, &this_rf);
-
- /* Drop this_rq and grab target_rq for activation */
- raw_spin_rq_lock(target_rq);
- activate_task(target_rq, p, 0);
- wakeup_preempt(target_rq, p, 0);
- put_task_struct(p);
- raw_spin_rq_unlock(target_rq);
-
- /* Finally, re-grab the origianl rq lock and return to pick-again */
- raw_spin_rq_lock(rq);
- rq_repin_lock(rq, rf);
- update_rq_clock(rq);
- return;
-
-err_out:
- task_rq_unlock(this_rq, p, &this_rf);
- put_task_struct(p);
raw_spin_rq_lock(rq);
rq_repin_lock(rq, rf);
update_rq_clock(rq);
+ return true;
}
/*
@@ -6888,7 +6886,7 @@ static void proxy_force_return(struct rq *rq, struct rq_flags *rf,
static struct task_struct *
find_proxy_task(struct rq *rq, struct task_struct *donor, struct rq_flags *rf)
{
- enum { FOUND, DEACTIVATE_DONOR, MIGRATE, NEEDS_RETURN } action = FOUND;
+ enum { FOUND, DEACTIVATE_DONOR, MIGRATE, NEEDS_WAKEUP } action = FOUND;
struct task_struct *owner = NULL;
bool curr_in_chain = false;
int this_cpu = cpu_of(rq);
@@ -6902,14 +6900,29 @@ find_proxy_task(struct rq *rq, struct task_struct *donor, struct rq_flags *rf)
/* Something changed in the chain, so pick again */
if (!mutex)
return NULL;
-
- /* if its PROXY_WAKING, do return migration or run if current */
+ /*
+ * If we see PROXY_WAKING, "p" is expecting a wakeup.
+ * Follow proxy_needs_return() and do a
+ * proxy_deactivate() + wake_up_task() if "p" is not the
+ * current. For current, just clear "blocked_on" and
+ * run the task.
+ *
+ * For the task waking us up, ttwu_state_match() will
+ * either see TASK_RUNNING or we'll be serialized under
+ * "p->pi_lock" (and optionally at ttwu_runnable() if
+ * we drop the rq_lock with "p" still queued on us) to
+ * handle the wakeup.
+ *
+ * Since we are under the rq_lock, ttwu() cannot clear
+ * PROXY_WAKING before we do.
+ */
if (mutex == PROXY_WAKING) {
+ clear_task_blocked_on(p, PROXY_WAKING);
if (task_current(rq, p)) {
- clear_task_blocked_on(p, PROXY_WAKING);
+ WRITE_ONCE(p->__state, TASK_RUNNING);
return p;
}
- action = NEEDS_RETURN;
+ action = NEEDS_WAKEUP;
break;
}
@@ -6937,15 +6950,20 @@ find_proxy_task(struct rq *rq, struct task_struct *donor, struct rq_flags *rf)
owner = __mutex_owner(mutex);
if (!owner) {
/*
- * If there is no owner, either clear blocked_on
- * and return p (if it is current and safe to
- * just run on this rq), or return-migrate the task.
+ * If there is no owner, clear "p->blocked_on"
+ * and run if "p" is the current.
+ *
+ * Else, this is a handoff and we'll soon be
+ * woken up by the previous owner but don't wait
+ * for that and try to do it ourselves via
+ * proxy_wake_up_donor().
*/
+ __clear_task_blocked_on(p, NULL);
if (task_current(rq, p)) {
- __clear_task_blocked_on(p, NULL);
+ WRITE_ONCE(p->__state, TASK_RUNNING);
return p;
}
- action = NEEDS_RETURN;
+ action = NEEDS_WAKEUP;
break;
}
@@ -7028,14 +7046,43 @@ find_proxy_task(struct rq *rq, struct task_struct *donor, struct rq_flags *rf)
/* Handle actions we need to do outside of the guard() scope */
switch (action) {
case DEACTIVATE_DONOR:
+ WARN_ON_ONCE(!task_is_blocked(donor));
+ /*
+ * Since we have to get a wakeup and be queued back
+ * beyond this point, clear the blocked_on relation.
+ *
+ * XXX: This is probabaly controversial since the
+ * blocked donors now have their "blocked_on" relation
+ * cleared and this field is also used by
+ * CONFIG_DEBUG_MUTEXES.
+ *
+ * But only debug_mutex_{add,remove}_waiter() cares
+ * about this field when the task is running and those
+ * relations are re-established correctly as soon as the
+ * task resumes execution and before we hit those
+ * checks.
+ */
+ clear_task_blocked_on(donor, NULL);
+
+ /* If donor deactivated successfully, retry pick. */
if (proxy_deactivate(rq, donor))
return NULL;
- /* If deactivate fails, force return */
- p = donor;
- fallthrough;
- case NEEDS_RETURN:
- proxy_force_return(rq, rf, p);
- return NULL;
+
+ /* Donor received a signal or was woken! Just run it. */
+ put_prev_set_next_task(rq, rq->donor, donor);
+ rq_set_donor(rq, donor);
+ return donor;
+ case NEEDS_WAKEUP:
+ /* If p was deactivated and woken successfully, retry pick. */
+ if (proxy_wake_up_donor(rq, rf, p))
+ return NULL;
+ /*
+ * p received a signal or was woken! Restore the donor
+ * context and run the task as proxy.
+ */
+ put_prev_set_next_task(rq, rq->donor, donor);
+ rq_set_donor(rq, donor);
+ return p;
case MIGRATE:
proxy_migrate_task(rq, rf, p, owner_cpu);
return NULL;
@@ -7191,9 +7238,19 @@ static void __sched notrace __schedule(int sched_mode)
* for slection with proxy-exec (without proxy-exec
* task_is_blocked() will always be false).
*/
- try_to_block_task(rq, prev, &prev_state,
- !task_is_blocked(prev));
+ try_to_block_task(rq, prev, &prev_state);
switch_count = &prev->nvcsw;
+ } else if (task_is_blocked(prev)) {
+ /*
+ * We are not blocking anymore! Clear "p->blocked_on"
+ * since something has forced us runnable.
+ *
+ * It is safe to inspect "prev->blocked_on" here without
+ * taking "p->blocked_lock" since blocked_on can only be
+ * set to PROXY_WAKING (!= NULL) when task is preempted
+ * without taking the rq_lock.
+ */
+ clear_task_blocked_on(prev, NULL);
}
prev_not_proxied = !prev->blocked_donor;
---
I guess ignorance is bliss and all this might be a terrible idea but
I like how proxy_wake_up_donor() (aka proxy_needs_return()) ends up
being much simpler now and the whole:
blocked_donor (!TASK_RUNNING && task_is_blocked())
|
v
wakeup (clear_task_blocked_on() + ttwu())
|
v
runnable (TASK_RUNNING && !task_is_blocked())
is easier to reason about IMO and should also help sched-ext since we
now have defined points to notify where the proxy starts and ends for
them to hook onto.
I think I lost the "wake_cpu" check in proxy_needs_return() somewhere
down the line but doing a block + wakeup irrespective seems to be the
right thing since in absence of proxy, !task_current() would have
blocked and received a wakeup anyways.
Sorry for dumping a bunch of radical suggestions during the holidays.
Hope we can flush out a bunch more of proxy execution (if not all of
it) this year :-)
Happy New Year!
--
Thanks and Regards,
Prateek
© 2016 - 2026 Red Hat, Inc.