kernel/sched/core.c | 73 ++++++++++++++++++++++++--------------------- 1 file changed, 39 insertions(+), 34 deletions(-)
ttwu_runnable() is used as a fast wakeup path when the wakee task
is running on CPU or runnable on RQ, in both cases we can just
set its state to TASK_RUNNING to prevent a sleep.
If the wakee task is on_cpu running, we don't need to update_rq_clock()
or check_preempt_curr().
But if the wakee task is on_rq && !on_cpu (e.g. an IRQ hit before
the task got to schedule() and the task been preempted), we should
check_preempt_curr() to see if it can preempt the current running.
Reorganize ttwu_do_wakeup() and ttwu_do_activate() to make
ttwu_do_wakeup() only mark the task runnable, so it can be used
in ttwu_runnable() and try_to_wake_up() fast paths.
This also removes the class->task_woken() callback from ttwu_runnable(),
which wasn't required per the RT/DL implementations: any required push
operation would have been queued during class->set_next_task() when p
got preempted.
ttwu_runnable() also loses the update to rq->idle_stamp, as by definition
the rq cannot be idle in this scenario.
Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
Suggested-by: Valentin Schneider <vschneid@redhat.com>
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Reviewed-by: Valentin Schneider <vschneid@redhat.com>
---
v4:
- s/This patch reorg/Reorganize/ per Bagas Sanjaya. Thanks!
v3:
- Improve the changelog per Valentin Schneider. Thanks!
v2:
- keep check_preempt_curr() for on_rq && !on_cpu case in ttwu_runnable(),
per Valentin Schneider.
- reorg ttwu_do_wakeup() and ttwu_do_activate() code, so ttwu_do_wakeup()
can be reused in ttwu_runnable(), per Peter Zijlstra.
- reuse ttwu_do_wakeup() in try_to_wake_up() (p == current) fast path too,
so ttwu_do_wakeup() become the only place we mark task runnable.
---
kernel/sched/core.c | 73 ++++++++++++++++++++++++---------------------
1 file changed, 39 insertions(+), 34 deletions(-)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 314c2c0219d9..d8216485b0ad 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3623,14 +3623,39 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
}
/*
- * Mark the task runnable and perform wakeup-preemption.
+ * Mark the task runnable.
*/
-static void ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags,
- struct rq_flags *rf)
+static inline void ttwu_do_wakeup(struct task_struct *p)
{
- check_preempt_curr(rq, p, wake_flags);
WRITE_ONCE(p->__state, TASK_RUNNING);
trace_sched_wakeup(p);
+}
+
+static void
+ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
+ struct rq_flags *rf)
+{
+ int en_flags = ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK;
+
+ lockdep_assert_rq_held(rq);
+
+ if (p->sched_contributes_to_load)
+ rq->nr_uninterruptible--;
+
+#ifdef CONFIG_SMP
+ if (wake_flags & WF_MIGRATED)
+ en_flags |= ENQUEUE_MIGRATED;
+ else
+#endif
+ if (p->in_iowait) {
+ delayacct_blkio_end(p);
+ atomic_dec(&task_rq(p)->nr_iowait);
+ }
+
+ activate_task(rq, p, en_flags);
+ check_preempt_curr(rq, p, wake_flags);
+
+ ttwu_do_wakeup(p);
#ifdef CONFIG_SMP
if (p->sched_class->task_woken) {
@@ -3660,31 +3685,6 @@ static void ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags,
#endif
}
-static void
-ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
- struct rq_flags *rf)
-{
- int en_flags = ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK;
-
- lockdep_assert_rq_held(rq);
-
- if (p->sched_contributes_to_load)
- rq->nr_uninterruptible--;
-
-#ifdef CONFIG_SMP
- if (wake_flags & WF_MIGRATED)
- en_flags |= ENQUEUE_MIGRATED;
- else
-#endif
- if (p->in_iowait) {
- delayacct_blkio_end(p);
- atomic_dec(&task_rq(p)->nr_iowait);
- }
-
- activate_task(rq, p, en_flags);
- ttwu_do_wakeup(rq, p, wake_flags, rf);
-}
-
/*
* Consider @p being inside a wait loop:
*
@@ -3718,9 +3718,15 @@ static int ttwu_runnable(struct task_struct *p, int wake_flags)
rq = __task_rq_lock(p, &rf);
if (task_on_rq_queued(p)) {
- /* check_preempt_curr() may use rq clock */
- update_rq_clock(rq);
- ttwu_do_wakeup(rq, p, wake_flags, &rf);
+ if (!task_on_cpu(rq, p)) {
+ /*
+ * When on_rq && !on_cpu the task is preempted, see if
+ * it should preempt whatever is current there now.
+ */
+ update_rq_clock(rq);
+ check_preempt_curr(rq, p, wake_flags);
+ }
+ ttwu_do_wakeup(p);
ret = 1;
}
__task_rq_unlock(rq, &rf);
@@ -4086,8 +4092,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
goto out;
trace_sched_waking(p);
- WRITE_ONCE(p->__state, TASK_RUNNING);
- trace_sched_wakeup(p);
+ ttwu_do_wakeup(p);
goto out;
}
--
2.37.2
On 2022-12-02 at 16:06:44 +0800, Chengming Zhou wrote: > ttwu_runnable() is used as a fast wakeup path when the wakee task > is running on CPU or runnable on RQ, in both cases we can just > set its state to TASK_RUNNING to prevent a sleep. > > If the wakee task is on_cpu running, we don't need to update_rq_clock() > or check_preempt_curr(). > > But if the wakee task is on_rq && !on_cpu (e.g. an IRQ hit before > the task got to schedule() and the task been preempted), we should > check_preempt_curr() to see if it can preempt the current running. > > Reorganize ttwu_do_wakeup() and ttwu_do_activate() to make > ttwu_do_wakeup() only mark the task runnable, so it can be used > in ttwu_runnable() and try_to_wake_up() fast paths. > > This also removes the class->task_woken() callback from ttwu_runnable(), > which wasn't required per the RT/DL implementations: any required push > operation would have been queued during class->set_next_task() when p > got preempted. > > ttwu_runnable() also loses the update to rq->idle_stamp, as by definition > the rq cannot be idle in this scenario. > > Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com> > Suggested-by: Valentin Schneider <vschneid@redhat.com> > Suggested-by: Peter Zijlstra <peterz@infradead.org> > Reviewed-by: Valentin Schneider <vschneid@redhat.com> > --- > v4: > - s/This patch reorg/Reorganize/ per Bagas Sanjaya. Thanks! > > v3: > - Improve the changelog per Valentin Schneider. Thanks! > > v2: > - keep check_preempt_curr() for on_rq && !on_cpu case in ttwu_runnable(), > per Valentin Schneider. > - reorg ttwu_do_wakeup() and ttwu_do_activate() code, so ttwu_do_wakeup() > can be reused in ttwu_runnable(), per Peter Zijlstra. > - reuse ttwu_do_wakeup() in try_to_wake_up() (p == current) fast path too, > so ttwu_do_wakeup() become the only place we mark task runnable. > --- > kernel/sched/core.c | 73 ++++++++++++++++++++++++--------------------- > 1 file changed, 39 insertions(+), 34 deletions(-) > > diff --git a/kernel/sched/core.c b/kernel/sched/core.c > index 314c2c0219d9..d8216485b0ad 100644 > --- a/kernel/sched/core.c > +++ b/kernel/sched/core.c > @@ -3623,14 +3623,39 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags) > } > > /* > - * Mark the task runnable and perform wakeup-preemption. > + * Mark the task runnable. > */ > -static void ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags, > - struct rq_flags *rf) > +static inline void ttwu_do_wakeup(struct task_struct *p) > { > - check_preempt_curr(rq, p, wake_flags); > WRITE_ONCE(p->__state, TASK_RUNNING); > trace_sched_wakeup(p); > +} > + > +static void > +ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags, > + struct rq_flags *rf) > +{ > + int en_flags = ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK; > + > + lockdep_assert_rq_held(rq); > + > + if (p->sched_contributes_to_load) > + rq->nr_uninterruptible--; > + > +#ifdef CONFIG_SMP > + if (wake_flags & WF_MIGRATED) > + en_flags |= ENQUEUE_MIGRATED; > + else > +#endif > + if (p->in_iowait) { > + delayacct_blkio_end(p); > + atomic_dec(&task_rq(p)->nr_iowait); > + } > + > + activate_task(rq, p, en_flags); > + check_preempt_curr(rq, p, wake_flags); > + > + ttwu_do_wakeup(p); > > #ifdef CONFIG_SMP > if (p->sched_class->task_woken) { > @@ -3660,31 +3685,6 @@ static void ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags, > #endif > } > > -static void > -ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags, > - struct rq_flags *rf) > -{ > - int en_flags = ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK; > - > - lockdep_assert_rq_held(rq); > - > - if (p->sched_contributes_to_load) > - rq->nr_uninterruptible--; > - > -#ifdef CONFIG_SMP > - if (wake_flags & WF_MIGRATED) > - en_flags |= ENQUEUE_MIGRATED; > - else > -#endif > - if (p->in_iowait) { > - delayacct_blkio_end(p); > - atomic_dec(&task_rq(p)->nr_iowait); > - } > - > - activate_task(rq, p, en_flags); > - ttwu_do_wakeup(rq, p, wake_flags, rf); > -} > - > /* > * Consider @p being inside a wait loop: > * > @@ -3718,9 +3718,15 @@ static int ttwu_runnable(struct task_struct *p, int wake_flags) > > rq = __task_rq_lock(p, &rf); > if (task_on_rq_queued(p)) { > - /* check_preempt_curr() may use rq clock */ > - update_rq_clock(rq); > - ttwu_do_wakeup(rq, p, wake_flags, &rf); > + if (!task_on_cpu(rq, p)) { > + /* > + * When on_rq && !on_cpu the task is preempted, see if > + * it should preempt whatever is current there now. > + */ > + update_rq_clock(rq); > + check_preempt_curr(rq, p, wake_flags); > + } > + ttwu_do_wakeup(p); > ret = 1; > } > __task_rq_unlock(rq, &rf); > @@ -4086,8 +4092,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) > goto out; > > trace_sched_waking(p); > - WRITE_ONCE(p->__state, TASK_RUNNING); > - trace_sched_wakeup(p); > + ttwu_do_wakeup(p); > goto out; > } > Just wonder if we could split the reorganization and optimization into two patches, so we can track the code change a little easier in the future? thanks, Chenyu
On 2022/12/3 15:55, Chen Yu wrote: > On 2022-12-02 at 16:06:44 +0800, Chengming Zhou wrote: >> ttwu_runnable() is used as a fast wakeup path when the wakee task >> is running on CPU or runnable on RQ, in both cases we can just >> set its state to TASK_RUNNING to prevent a sleep. >> >> If the wakee task is on_cpu running, we don't need to update_rq_clock() >> or check_preempt_curr(). >> >> But if the wakee task is on_rq && !on_cpu (e.g. an IRQ hit before >> the task got to schedule() and the task been preempted), we should >> check_preempt_curr() to see if it can preempt the current running. >> >> Reorganize ttwu_do_wakeup() and ttwu_do_activate() to make >> ttwu_do_wakeup() only mark the task runnable, so it can be used >> in ttwu_runnable() and try_to_wake_up() fast paths. >> >> This also removes the class->task_woken() callback from ttwu_runnable(), >> which wasn't required per the RT/DL implementations: any required push >> operation would have been queued during class->set_next_task() when p >> got preempted. >> >> ttwu_runnable() also loses the update to rq->idle_stamp, as by definition >> the rq cannot be idle in this scenario. >> >> Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com> >> Suggested-by: Valentin Schneider <vschneid@redhat.com> >> Suggested-by: Peter Zijlstra <peterz@infradead.org> >> Reviewed-by: Valentin Schneider <vschneid@redhat.com> >> --- >> v4: >> - s/This patch reorg/Reorganize/ per Bagas Sanjaya. Thanks! >> >> v3: >> - Improve the changelog per Valentin Schneider. Thanks! >> >> v2: >> - keep check_preempt_curr() for on_rq && !on_cpu case in ttwu_runnable(), >> per Valentin Schneider. >> - reorg ttwu_do_wakeup() and ttwu_do_activate() code, so ttwu_do_wakeup() >> can be reused in ttwu_runnable(), per Peter Zijlstra. >> - reuse ttwu_do_wakeup() in try_to_wake_up() (p == current) fast path too, >> so ttwu_do_wakeup() become the only place we mark task runnable. >> --- >> kernel/sched/core.c | 73 ++++++++++++++++++++++++--------------------- >> 1 file changed, 39 insertions(+), 34 deletions(-) >> >> diff --git a/kernel/sched/core.c b/kernel/sched/core.c >> index 314c2c0219d9..d8216485b0ad 100644 >> --- a/kernel/sched/core.c >> +++ b/kernel/sched/core.c >> @@ -3623,14 +3623,39 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags) >> } >> >> /* >> - * Mark the task runnable and perform wakeup-preemption. >> + * Mark the task runnable. >> */ >> -static void ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags, >> - struct rq_flags *rf) >> +static inline void ttwu_do_wakeup(struct task_struct *p) >> { >> - check_preempt_curr(rq, p, wake_flags); >> WRITE_ONCE(p->__state, TASK_RUNNING); >> trace_sched_wakeup(p); >> +} >> + >> +static void >> +ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags, >> + struct rq_flags *rf) >> +{ >> + int en_flags = ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK; >> + >> + lockdep_assert_rq_held(rq); >> + >> + if (p->sched_contributes_to_load) >> + rq->nr_uninterruptible--; >> + >> +#ifdef CONFIG_SMP >> + if (wake_flags & WF_MIGRATED) >> + en_flags |= ENQUEUE_MIGRATED; >> + else >> +#endif >> + if (p->in_iowait) { >> + delayacct_blkio_end(p); >> + atomic_dec(&task_rq(p)->nr_iowait); >> + } >> + >> + activate_task(rq, p, en_flags); >> + check_preempt_curr(rq, p, wake_flags); >> + >> + ttwu_do_wakeup(p); >> >> #ifdef CONFIG_SMP >> if (p->sched_class->task_woken) { >> @@ -3660,31 +3685,6 @@ static void ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags, >> #endif >> } >> >> -static void >> -ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags, >> - struct rq_flags *rf) >> -{ >> - int en_flags = ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK; >> - >> - lockdep_assert_rq_held(rq); >> - >> - if (p->sched_contributes_to_load) >> - rq->nr_uninterruptible--; >> - >> -#ifdef CONFIG_SMP >> - if (wake_flags & WF_MIGRATED) >> - en_flags |= ENQUEUE_MIGRATED; >> - else >> -#endif >> - if (p->in_iowait) { >> - delayacct_blkio_end(p); >> - atomic_dec(&task_rq(p)->nr_iowait); >> - } >> - >> - activate_task(rq, p, en_flags); >> - ttwu_do_wakeup(rq, p, wake_flags, rf); >> -} >> - >> /* >> * Consider @p being inside a wait loop: >> * >> @@ -3718,9 +3718,15 @@ static int ttwu_runnable(struct task_struct *p, int wake_flags) >> >> rq = __task_rq_lock(p, &rf); >> if (task_on_rq_queued(p)) { >> - /* check_preempt_curr() may use rq clock */ >> - update_rq_clock(rq); >> - ttwu_do_wakeup(rq, p, wake_flags, &rf); >> + if (!task_on_cpu(rq, p)) { >> + /* >> + * When on_rq && !on_cpu the task is preempted, see if >> + * it should preempt whatever is current there now. >> + */ >> + update_rq_clock(rq); >> + check_preempt_curr(rq, p, wake_flags); >> + } >> + ttwu_do_wakeup(p); >> ret = 1; >> } >> __task_rq_unlock(rq, &rf); >> @@ -4086,8 +4092,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) >> goto out; >> >> trace_sched_waking(p); >> - WRITE_ONCE(p->__state, TASK_RUNNING); >> - trace_sched_wakeup(p); >> + ttwu_do_wakeup(p); >> goto out; >> } >> > Just wonder if we could split the reorganization and optimization into two patches, > so we can track the code change a little easier in the future? Thanks for your suggestion. I'll try to split into two patches later and resend. Thanks. > > thanks, > Chenyu
© 2016 - 2025 Red Hat, Inc.