Delayed dequeued feature keeps a sleeping task enqueued until its
lag has elapsed. As a result, it stays also visible in rq->nr_running.
So when in wake_affine_idle(), we should use the real running-tasks
in rq to check whether we should place the wake-up task to
current cpu.
On the other hand, add a helper function to return the nr-delayed.
Fixes: 152e11f6df29 ("sched/fair: Implement delayed dequeue")
Signed-off-by: Xuewen Yan <xuewen.yan@unisoc.com>
---
V2:
- add helper function (Vincent)
---
kernel/sched/fair.c | 13 +++++++++++--
1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 1c0ef435a7aa..a354f29c4f6f 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7181,6 +7181,11 @@ static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
return true;
}
+static inline unsigned int cfs_h_nr_delayed(struct rq *rq)
+{
+ return (rq->cfs.h_nr_queued - rq->cfs.h_nr_runnable);
+}
+
#ifdef CONFIG_SMP
/* Working cpumask for: sched_balance_rq(), sched_balance_newidle(). */
@@ -7342,8 +7347,12 @@ wake_affine_idle(int this_cpu, int prev_cpu, int sync)
if (available_idle_cpu(this_cpu) && cpus_share_cache(this_cpu, prev_cpu))
return available_idle_cpu(prev_cpu) ? prev_cpu : this_cpu;
- if (sync && cpu_rq(this_cpu)->nr_running == 1)
- return this_cpu;
+ if (sync) {
+ struct rq *rq = cpu_rq(this_cpu);
+
+ if ((rq->nr_running - cfs_h_nr_delayed(rq)) == 1)
+ return this_cpu;
+ }
if (available_idle_cpu(prev_cpu))
return prev_cpu;
--
2.25.1
On Mon, 3 Mar 2025 at 11:56, Xuewen Yan <xuewen.yan@unisoc.com> wrote:
>
> Delayed dequeued feature keeps a sleeping task enqueued until its
> lag has elapsed. As a result, it stays also visible in rq->nr_running.
> So when in wake_affine_idle(), we should use the real running-tasks
> in rq to check whether we should place the wake-up task to
> current cpu.
> On the other hand, add a helper function to return the nr-delayed.
>
> Fixes: 152e11f6df29 ("sched/fair: Implement delayed dequeue")
> Signed-off-by: Xuewen Yan <xuewen.yan@unisoc.com>
Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
> ---
> V2:
> - add helper function (Vincent)
> ---
> kernel/sched/fair.c | 13 +++++++++++--
> 1 file changed, 11 insertions(+), 2 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 1c0ef435a7aa..a354f29c4f6f 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -7181,6 +7181,11 @@ static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
> return true;
> }
>
> +static inline unsigned int cfs_h_nr_delayed(struct rq *rq)
> +{
> + return (rq->cfs.h_nr_queued - rq->cfs.h_nr_runnable);
> +}
> +
> #ifdef CONFIG_SMP
>
> /* Working cpumask for: sched_balance_rq(), sched_balance_newidle(). */
> @@ -7342,8 +7347,12 @@ wake_affine_idle(int this_cpu, int prev_cpu, int sync)
> if (available_idle_cpu(this_cpu) && cpus_share_cache(this_cpu, prev_cpu))
> return available_idle_cpu(prev_cpu) ? prev_cpu : this_cpu;
>
> - if (sync && cpu_rq(this_cpu)->nr_running == 1)
> - return this_cpu;
> + if (sync) {
> + struct rq *rq = cpu_rq(this_cpu);
> +
> + if ((rq->nr_running - cfs_h_nr_delayed(rq)) == 1)
> + return this_cpu;
> + }
>
> if (available_idle_cpu(prev_cpu))
> return prev_cpu;
> --
> 2.25.1
>
Hi Vincent,
Sorry to ask, but may I know if this patch can be merged into the mainline?
Thanks!
On Wed, Mar 19, 2025 at 5:35 PM Vincent Guittot
<vincent.guittot@linaro.org> wrote:
>
> On Mon, 3 Mar 2025 at 11:56, Xuewen Yan <xuewen.yan@unisoc.com> wrote:
> >
> > Delayed dequeued feature keeps a sleeping task enqueued until its
> > lag has elapsed. As a result, it stays also visible in rq->nr_running.
> > So when in wake_affine_idle(), we should use the real running-tasks
> > in rq to check whether we should place the wake-up task to
> > current cpu.
> > On the other hand, add a helper function to return the nr-delayed.
> >
> > Fixes: 152e11f6df29 ("sched/fair: Implement delayed dequeue")
> > Signed-off-by: Xuewen Yan <xuewen.yan@unisoc.com>
>
> Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
>
> > ---
> > V2:
> > - add helper function (Vincent)
> > ---
> > kernel/sched/fair.c | 13 +++++++++++--
> > 1 file changed, 11 insertions(+), 2 deletions(-)
> >
> > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> > index 1c0ef435a7aa..a354f29c4f6f 100644
> > --- a/kernel/sched/fair.c
> > +++ b/kernel/sched/fair.c
> > @@ -7181,6 +7181,11 @@ static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
> > return true;
> > }
> >
> > +static inline unsigned int cfs_h_nr_delayed(struct rq *rq)
> > +{
> > + return (rq->cfs.h_nr_queued - rq->cfs.h_nr_runnable);
> > +}
> > +
> > #ifdef CONFIG_SMP
> >
> > /* Working cpumask for: sched_balance_rq(), sched_balance_newidle(). */
> > @@ -7342,8 +7347,12 @@ wake_affine_idle(int this_cpu, int prev_cpu, int sync)
> > if (available_idle_cpu(this_cpu) && cpus_share_cache(this_cpu, prev_cpu))
> > return available_idle_cpu(prev_cpu) ? prev_cpu : this_cpu;
> >
> > - if (sync && cpu_rq(this_cpu)->nr_running == 1)
> > - return this_cpu;
> > + if (sync) {
> > + struct rq *rq = cpu_rq(this_cpu);
> > +
> > + if ((rq->nr_running - cfs_h_nr_delayed(rq)) == 1)
> > + return this_cpu;
> > + }
> >
> > if (available_idle_cpu(prev_cpu))
> > return prev_cpu;
> > --
> > 2.25.1
> >
Hi Xuewen,
On 3/3/25 6:52 PM, Xuewen Yan wrote:
> Delayed dequeued feature keeps a sleeping task enqueued until its
> lag has elapsed. As a result, it stays also visible in rq->nr_running.
> So when in wake_affine_idle(), we should use the real running-tasks
> in rq to check whether we should place the wake-up task to
> current cpu.
> On the other hand, add a helper function to return the nr-delayed.
>
> Fixes: 152e11f6df29 ("sched/fair: Implement delayed dequeue")
> Signed-off-by: Xuewen Yan <xuewen.yan@unisoc.com>
We noticed that your patch can fix a regression introduced by DELAY_DEQUEUE
in lmbench lat_ctx.
Here's the performance data running
`./lat_ctx -P $(nproc) 96`
on an intel SPR server with 192 CPUs (smaller is better):
DELAY_DEQUEUE 9.71
NO_DELAY_DEQUEUE 4.02
DELAY_DEQUEUE + this_patch 3.86
Also on an aarch64 server with 128 CPUs:
DELAY_DEQUEUE 14.82
NO_DELAY_DEQUEUE 5.62
DELAY_DEQUEUE + this_patch 4.66
We found the lmbench lat_ctx regression when enabling DELAY_DEQUEUE, with
cpu-migrations increasing more than 100 times, higher nr_wakeups_migrate,
nr_wakeups_remote, nr_wakeups_affine, nr_wakeups_affine_attempts and lower
nr_wakeups_local.
We think this benchmark prefers waker and wakee staying on the same cpu,
but WA_IDLE failed to reach this due to sched_delay noise. So your patch
does fix it.
Feel free to add
Reviewed-and-tested-by: Tianchen Ding <dtcccc@linux.alibaba.com>
Thanks.
The following commit has been merged into the sched/core branch of tip:
Commit-ID: aa3ee4f0b7541382c9f6f43f7408d73a5d4f4042
Gitweb: https://git.kernel.org/tip/aa3ee4f0b7541382c9f6f43f7408d73a5d4f4042
Author: Xuewen Yan <xuewen.yan@unisoc.com>
AuthorDate: Mon, 03 Mar 2025 18:52:39 +08:00
Committer: Peter Zijlstra <peterz@infradead.org>
CommitterDate: Wed, 21 May 2025 13:57:37 +02:00
sched/fair: Fixup wake_up_sync() vs DELAYED_DEQUEUE
Delayed dequeued feature keeps a sleeping task enqueued until its
lag has elapsed. As a result, it stays also visible in rq->nr_running.
So when in wake_affine_idle(), we should use the real running-tasks
in rq to check whether we should place the wake-up task to
current cpu.
On the other hand, add a helper function to return the nr-delayed.
Fixes: 152e11f6df29 ("sched/fair: Implement delayed dequeue")
Signed-off-by: Xuewen Yan <xuewen.yan@unisoc.com>
Reviewed-and-tested-by: Tianchen Ding <dtcccc@linux.alibaba.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
Link: https://lore.kernel.org/r/20250303105241.17251-2-xuewen.yan@unisoc.com
---
kernel/sched/fair.c | 13 +++++++++++--
1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index eb5a257..b00f167 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7193,6 +7193,11 @@ static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
return true;
}
+static inline unsigned int cfs_h_nr_delayed(struct rq *rq)
+{
+ return (rq->cfs.h_nr_queued - rq->cfs.h_nr_runnable);
+}
+
#ifdef CONFIG_SMP
/* Working cpumask for: sched_balance_rq(), sched_balance_newidle(). */
@@ -7354,8 +7359,12 @@ wake_affine_idle(int this_cpu, int prev_cpu, int sync)
if (available_idle_cpu(this_cpu) && cpus_share_cache(this_cpu, prev_cpu))
return available_idle_cpu(prev_cpu) ? prev_cpu : this_cpu;
- if (sync && cpu_rq(this_cpu)->nr_running == 1)
- return this_cpu;
+ if (sync) {
+ struct rq *rq = cpu_rq(this_cpu);
+
+ if ((rq->nr_running - cfs_h_nr_delayed(rq)) == 1)
+ return this_cpu;
+ }
if (available_idle_cpu(prev_cpu))
return prev_cpu;
© 2016 - 2026 Red Hat, Inc.