There are two (soon three) callers of ttwu_queue_wakelist(),
distinguish them with their own WF_ and add some knobs on.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20250520101727.874587738@infradead.org
---
kernel/sched/core.c | 22 ++++++++++++----------
kernel/sched/features.h | 2 ++
kernel/sched/sched.h | 2 ++
3 files changed, 16 insertions(+), 10 deletions(-)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3888,7 +3888,7 @@ bool cpus_share_resources(int this_cpu,
return per_cpu(sd_share_id, this_cpu) == per_cpu(sd_share_id, that_cpu);
}
-static inline bool ttwu_queue_cond(struct task_struct *p, int cpu)
+static inline bool ttwu_queue_cond(struct task_struct *p, int cpu, bool def)
{
/* See SCX_OPS_ALLOW_QUEUED_WAKEUP. */
if (!scx_allow_ttwu_queue(p))
@@ -3929,18 +3929,19 @@ static inline bool ttwu_queue_cond(struc
if (!cpu_rq(cpu)->nr_running)
return true;
- return false;
+ return def;
}
static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
{
- if (sched_feat(TTWU_QUEUE) && ttwu_queue_cond(p, cpu)) {
- sched_clock_cpu(cpu); /* Sync clocks across CPUs */
- __ttwu_queue_wakelist(p, cpu, wake_flags);
- return true;
- }
+ bool def = sched_feat(TTWU_QUEUE_DEFAULT);
+
+ if (!ttwu_queue_cond(p, cpu, def))
+ return false;
- return false;
+ sched_clock_cpu(cpu); /* Sync clocks across CPUs */
+ __ttwu_queue_wakelist(p, cpu, wake_flags);
+ return true;
}
static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
@@ -3948,7 +3949,7 @@ static void ttwu_queue(struct task_struc
struct rq *rq = cpu_rq(cpu);
struct rq_flags rf;
- if (ttwu_queue_wakelist(p, cpu, wake_flags))
+ if (sched_feat(TTWU_QUEUE) && ttwu_queue_wakelist(p, cpu, wake_flags))
return;
rq_lock(rq, &rf);
@@ -4251,7 +4252,8 @@ int try_to_wake_up(struct task_struct *p
* scheduling.
*/
if (smp_load_acquire(&p->on_cpu) &&
- ttwu_queue_wakelist(p, task_cpu(p), wake_flags))
+ sched_feat(TTWU_QUEUE_ON_CPU) &&
+ ttwu_queue_wakelist(p, task_cpu(p), wake_flags | WF_ON_CPU))
break;
cpu = select_task_rq(p, p->wake_cpu, &wake_flags);
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -81,6 +81,8 @@ SCHED_FEAT(TTWU_QUEUE, false)
*/
SCHED_FEAT(TTWU_QUEUE, true)
#endif
+SCHED_FEAT(TTWU_QUEUE_ON_CPU, true)
+SCHED_FEAT(TTWU_QUEUE_DEFAULT, false)
/*
* When doing wakeups, attempt to limit superfluous scans of the LLC domain.
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2279,6 +2279,8 @@ static inline int task_on_rq_migrating(s
#define WF_CURRENT_CPU 0x40 /* Prefer to move the wakee to the current CPU. */
#define WF_RQ_SELECTED 0x80 /* ->select_task_rq() was called */
+#define WF_ON_CPU 0x0100
+
static_assert(WF_EXEC == SD_BALANCE_EXEC);
static_assert(WF_FORK == SD_BALANCE_FORK);
static_assert(WF_TTWU == SD_BALANCE_WAKE);
On Wed, Jul 02, 2025 at 01:49:29PM +0200, Peter Zijlstra wrote: > There are two (soon three) callers of ttwu_queue_wakelist(), > distinguish them with their own WF_ and add some knobs on. > > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> > Link: https://lkml.kernel.org/r/20250520101727.874587738@infradead.org > --- > kernel/sched/core.c | 22 ++++++++++++---------- > kernel/sched/features.h | 2 ++ > kernel/sched/sched.h | 2 ++ > 3 files changed, 16 insertions(+), 10 deletions(-) > > --- a/kernel/sched/core.c > +++ b/kernel/sched/core.c > @@ -3888,7 +3888,7 @@ bool cpus_share_resources(int this_cpu, > return per_cpu(sd_share_id, this_cpu) == per_cpu(sd_share_id, that_cpu); > } > > -static inline bool ttwu_queue_cond(struct task_struct *p, int cpu) > +static inline bool ttwu_queue_cond(struct task_struct *p, int cpu, bool def) > { > /* See SCX_OPS_ALLOW_QUEUED_WAKEUP. */ > if (!scx_allow_ttwu_queue(p)) > @@ -3929,18 +3929,19 @@ static inline bool ttwu_queue_cond(struc > if (!cpu_rq(cpu)->nr_running) > return true; > > - return false; > + return def; > } > > static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags) > { > - if (sched_feat(TTWU_QUEUE) && ttwu_queue_cond(p, cpu)) { > - sched_clock_cpu(cpu); /* Sync clocks across CPUs */ > - __ttwu_queue_wakelist(p, cpu, wake_flags); > - return true; > - } > + bool def = sched_feat(TTWU_QUEUE_DEFAULT); > + > + if (!ttwu_queue_cond(p, cpu, def)) > + return false; > > - return false; > + sched_clock_cpu(cpu); /* Sync clocks across CPUs */ > + __ttwu_queue_wakelist(p, cpu, wake_flags); > + return true; > } > I get that you're moving the feature checks into the callers but it's unclear what the intent behind TTWU_QUEUE_DEFAULT is. It's somewhat preserving existing behaviour and is probably preparation for a later patch but it's less clear why it's necessary or what changing it would reveal while debugging. > static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) > @@ -3948,7 +3949,7 @@ static void ttwu_queue(struct task_struc > struct rq *rq = cpu_rq(cpu); > struct rq_flags rf; > > - if (ttwu_queue_wakelist(p, cpu, wake_flags)) > + if (sched_feat(TTWU_QUEUE) && ttwu_queue_wakelist(p, cpu, wake_flags)) > return; > > rq_lock(rq, &rf); > @@ -4251,7 +4252,8 @@ int try_to_wake_up(struct task_struct *p > * scheduling. > */ > if (smp_load_acquire(&p->on_cpu) && > - ttwu_queue_wakelist(p, task_cpu(p), wake_flags)) > + sched_feat(TTWU_QUEUE_ON_CPU) && > + ttwu_queue_wakelist(p, task_cpu(p), wake_flags | WF_ON_CPU)) > break; > > cpu = select_task_rq(p, p->wake_cpu, &wake_flags); > --- a/kernel/sched/features.h > +++ b/kernel/sched/features.h > @@ -81,6 +81,8 @@ SCHED_FEAT(TTWU_QUEUE, false) > */ > SCHED_FEAT(TTWU_QUEUE, true) > #endif > +SCHED_FEAT(TTWU_QUEUE_ON_CPU, true) > +SCHED_FEAT(TTWU_QUEUE_DEFAULT, false) > > /* > * When doing wakeups, attempt to limit superfluous scans of the LLC domain. > --- a/kernel/sched/sched.h > +++ b/kernel/sched/sched.h > @@ -2279,6 +2279,8 @@ static inline int task_on_rq_migrating(s > #define WF_CURRENT_CPU 0x40 /* Prefer to move the wakee to the current CPU. */ > #define WF_RQ_SELECTED 0x80 /* ->select_task_rq() was called */ > > +#define WF_ON_CPU 0x0100 > + > static_assert(WF_EXEC == SD_BALANCE_EXEC); > static_assert(WF_FORK == SD_BALANCE_FORK); > static_assert(WF_TTWU == SD_BALANCE_WAKE); > -- Mel Gorman SUSE Labs
On Wed, 2 Jul 2025 at 14:12, Peter Zijlstra <peterz@infradead.org> wrote: > > There are two (soon three) callers of ttwu_queue_wakelist(), > distinguish them with their own WF_ and add some knobs on. > > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> > Link: https://lkml.kernel.org/r/20250520101727.874587738@infradead.org > --- > kernel/sched/core.c | 22 ++++++++++++---------- > kernel/sched/features.h | 2 ++ > kernel/sched/sched.h | 2 ++ > 3 files changed, 16 insertions(+), 10 deletions(-) > > --- a/kernel/sched/core.c > +++ b/kernel/sched/core.c > @@ -3888,7 +3888,7 @@ bool cpus_share_resources(int this_cpu, > return per_cpu(sd_share_id, this_cpu) == per_cpu(sd_share_id, that_cpu); > } > > -static inline bool ttwu_queue_cond(struct task_struct *p, int cpu) > +static inline bool ttwu_queue_cond(struct task_struct *p, int cpu, bool def) > { > /* See SCX_OPS_ALLOW_QUEUED_WAKEUP. */ > if (!scx_allow_ttwu_queue(p)) > @@ -3929,18 +3929,19 @@ static inline bool ttwu_queue_cond(struc > if (!cpu_rq(cpu)->nr_running) > return true; > > - return false; > + return def; > } > > static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags) > { > - if (sched_feat(TTWU_QUEUE) && ttwu_queue_cond(p, cpu)) { > - sched_clock_cpu(cpu); /* Sync clocks across CPUs */ > - __ttwu_queue_wakelist(p, cpu, wake_flags); > - return true; > - } > + bool def = sched_feat(TTWU_QUEUE_DEFAULT); I'm always confused by this sched feature name because sched_feat(TTWU_QUEUE_DEFAULT) must be false in order to have the current (default ?) behaviour Or you mean queue by default in the wakelist which is disable to keep current behavior > + > + if (!ttwu_queue_cond(p, cpu, def)) > + return false; > > - return false; > + sched_clock_cpu(cpu); /* Sync clocks across CPUs */ > + __ttwu_queue_wakelist(p, cpu, wake_flags); > + return true; > } > > static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) > @@ -3948,7 +3949,7 @@ static void ttwu_queue(struct task_struc > struct rq *rq = cpu_rq(cpu); > struct rq_flags rf; > > - if (ttwu_queue_wakelist(p, cpu, wake_flags)) > + if (sched_feat(TTWU_QUEUE) && ttwu_queue_wakelist(p, cpu, wake_flags)) > return; > > rq_lock(rq, &rf); > @@ -4251,7 +4252,8 @@ int try_to_wake_up(struct task_struct *p > * scheduling. > */ > if (smp_load_acquire(&p->on_cpu) && > - ttwu_queue_wakelist(p, task_cpu(p), wake_flags)) > + sched_feat(TTWU_QUEUE_ON_CPU) && > + ttwu_queue_wakelist(p, task_cpu(p), wake_flags | WF_ON_CPU)) > break; > > cpu = select_task_rq(p, p->wake_cpu, &wake_flags); > --- a/kernel/sched/features.h > +++ b/kernel/sched/features.h > @@ -81,6 +81,8 @@ SCHED_FEAT(TTWU_QUEUE, false) > */ > SCHED_FEAT(TTWU_QUEUE, true) > #endif > +SCHED_FEAT(TTWU_QUEUE_ON_CPU, true) > +SCHED_FEAT(TTWU_QUEUE_DEFAULT, false) > > /* > * When doing wakeups, attempt to limit superfluous scans of the LLC domain. > --- a/kernel/sched/sched.h > +++ b/kernel/sched/sched.h > @@ -2279,6 +2279,8 @@ static inline int task_on_rq_migrating(s > #define WF_CURRENT_CPU 0x40 /* Prefer to move the wakee to the current CPU. */ > #define WF_RQ_SELECTED 0x80 /* ->select_task_rq() was called */ > > +#define WF_ON_CPU 0x0100 > + > static_assert(WF_EXEC == SD_BALANCE_EXEC); > static_assert(WF_FORK == SD_BALANCE_FORK); > static_assert(WF_TTWU == SD_BALANCE_WAKE); > >
© 2016 - 2025 Red Hat, Inc.