[PATCH v2 05/12] sched: Add ttwu_queue controls

Peter Zijlstra posted 12 patches 3 months, 1 week ago
[PATCH v2 05/12] sched: Add ttwu_queue controls
Posted by Peter Zijlstra 3 months, 1 week ago
There are two (soon three) callers of ttwu_queue_wakelist(),
distinguish them with their own WF_ and add some knobs on.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20250520101727.874587738@infradead.org
---
 kernel/sched/core.c     |   22 ++++++++++++----------
 kernel/sched/features.h |    2 ++
 kernel/sched/sched.h    |    2 ++
 3 files changed, 16 insertions(+), 10 deletions(-)

--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3888,7 +3888,7 @@ bool cpus_share_resources(int this_cpu,
 	return per_cpu(sd_share_id, this_cpu) == per_cpu(sd_share_id, that_cpu);
 }
 
-static inline bool ttwu_queue_cond(struct task_struct *p, int cpu)
+static inline bool ttwu_queue_cond(struct task_struct *p, int cpu, bool def)
 {
 	/* See SCX_OPS_ALLOW_QUEUED_WAKEUP. */
 	if (!scx_allow_ttwu_queue(p))
@@ -3929,18 +3929,19 @@ static inline bool ttwu_queue_cond(struc
 	if (!cpu_rq(cpu)->nr_running)
 		return true;
 
-	return false;
+	return def;
 }
 
 static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
 {
-	if (sched_feat(TTWU_QUEUE) && ttwu_queue_cond(p, cpu)) {
-		sched_clock_cpu(cpu); /* Sync clocks across CPUs */
-		__ttwu_queue_wakelist(p, cpu, wake_flags);
-		return true;
-	}
+	bool def = sched_feat(TTWU_QUEUE_DEFAULT);
+
+	if (!ttwu_queue_cond(p, cpu, def))
+		return false;
 
-	return false;
+	sched_clock_cpu(cpu); /* Sync clocks across CPUs */
+	__ttwu_queue_wakelist(p, cpu, wake_flags);
+	return true;
 }
 
 static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
@@ -3948,7 +3949,7 @@ static void ttwu_queue(struct task_struc
 	struct rq *rq = cpu_rq(cpu);
 	struct rq_flags rf;
 
-	if (ttwu_queue_wakelist(p, cpu, wake_flags))
+	if (sched_feat(TTWU_QUEUE) && ttwu_queue_wakelist(p, cpu, wake_flags))
 		return;
 
 	rq_lock(rq, &rf);
@@ -4251,7 +4252,8 @@ int try_to_wake_up(struct task_struct *p
 		 * scheduling.
 		 */
 		if (smp_load_acquire(&p->on_cpu) &&
-		    ttwu_queue_wakelist(p, task_cpu(p), wake_flags))
+		    sched_feat(TTWU_QUEUE_ON_CPU) &&
+		    ttwu_queue_wakelist(p, task_cpu(p), wake_flags | WF_ON_CPU))
 			break;
 
 		cpu = select_task_rq(p, p->wake_cpu, &wake_flags);
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -81,6 +81,8 @@ SCHED_FEAT(TTWU_QUEUE, false)
  */
 SCHED_FEAT(TTWU_QUEUE, true)
 #endif
+SCHED_FEAT(TTWU_QUEUE_ON_CPU, true)
+SCHED_FEAT(TTWU_QUEUE_DEFAULT, false)
 
 /*
  * When doing wakeups, attempt to limit superfluous scans of the LLC domain.
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2279,6 +2279,8 @@ static inline int task_on_rq_migrating(s
 #define WF_CURRENT_CPU		0x40 /* Prefer to move the wakee to the current CPU. */
 #define WF_RQ_SELECTED		0x80 /* ->select_task_rq() was called */
 
+#define WF_ON_CPU		0x0100
+
 static_assert(WF_EXEC == SD_BALANCE_EXEC);
 static_assert(WF_FORK == SD_BALANCE_FORK);
 static_assert(WF_TTWU == SD_BALANCE_WAKE);
Re: [PATCH v2 05/12] sched: Add ttwu_queue controls
Posted by Mel Gorman 2 months, 3 weeks ago
On Wed, Jul 02, 2025 at 01:49:29PM +0200, Peter Zijlstra wrote:
> There are two (soon three) callers of ttwu_queue_wakelist(),
> distinguish them with their own WF_ and add some knobs on.
> 
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> Link: https://lkml.kernel.org/r/20250520101727.874587738@infradead.org
> ---
>  kernel/sched/core.c     |   22 ++++++++++++----------
>  kernel/sched/features.h |    2 ++
>  kernel/sched/sched.h    |    2 ++
>  3 files changed, 16 insertions(+), 10 deletions(-)
> 
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -3888,7 +3888,7 @@ bool cpus_share_resources(int this_cpu,
>  	return per_cpu(sd_share_id, this_cpu) == per_cpu(sd_share_id, that_cpu);
>  }
>  
> -static inline bool ttwu_queue_cond(struct task_struct *p, int cpu)
> +static inline bool ttwu_queue_cond(struct task_struct *p, int cpu, bool def)
>  {
>  	/* See SCX_OPS_ALLOW_QUEUED_WAKEUP. */
>  	if (!scx_allow_ttwu_queue(p))
> @@ -3929,18 +3929,19 @@ static inline bool ttwu_queue_cond(struc
>  	if (!cpu_rq(cpu)->nr_running)
>  		return true;
>  
> -	return false;
> +	return def;
>  }
>  
>  static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
>  {
> -	if (sched_feat(TTWU_QUEUE) && ttwu_queue_cond(p, cpu)) {
> -		sched_clock_cpu(cpu); /* Sync clocks across CPUs */
> -		__ttwu_queue_wakelist(p, cpu, wake_flags);
> -		return true;
> -	}
> +	bool def = sched_feat(TTWU_QUEUE_DEFAULT);
> +
> +	if (!ttwu_queue_cond(p, cpu, def))
> +		return false;
>  
> -	return false;
> +	sched_clock_cpu(cpu); /* Sync clocks across CPUs */
> +	__ttwu_queue_wakelist(p, cpu, wake_flags);
> +	return true;
>  }
>  

I get that you're moving the feature checks into the callers but it's
unclear what the intent behind TTWU_QUEUE_DEFAULT is. It's somewhat
preserving existing behaviour and is probably preparation for a later
patch but it's less clear why it's necessary or what changing it would
reveal while debugging.

>  static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
> @@ -3948,7 +3949,7 @@ static void ttwu_queue(struct task_struc
>  	struct rq *rq = cpu_rq(cpu);
>  	struct rq_flags rf;
>  
> -	if (ttwu_queue_wakelist(p, cpu, wake_flags))
> +	if (sched_feat(TTWU_QUEUE) && ttwu_queue_wakelist(p, cpu, wake_flags))
>  		return;
>  
>  	rq_lock(rq, &rf);
> @@ -4251,7 +4252,8 @@ int try_to_wake_up(struct task_struct *p
>  		 * scheduling.
>  		 */
>  		if (smp_load_acquire(&p->on_cpu) &&
> -		    ttwu_queue_wakelist(p, task_cpu(p), wake_flags))
> +		    sched_feat(TTWU_QUEUE_ON_CPU) &&
> +		    ttwu_queue_wakelist(p, task_cpu(p), wake_flags | WF_ON_CPU))
>  			break;
>  
>  		cpu = select_task_rq(p, p->wake_cpu, &wake_flags);
> --- a/kernel/sched/features.h
> +++ b/kernel/sched/features.h
> @@ -81,6 +81,8 @@ SCHED_FEAT(TTWU_QUEUE, false)
>   */
>  SCHED_FEAT(TTWU_QUEUE, true)
>  #endif
> +SCHED_FEAT(TTWU_QUEUE_ON_CPU, true)
> +SCHED_FEAT(TTWU_QUEUE_DEFAULT, false)
>  
>  /*
>   * When doing wakeups, attempt to limit superfluous scans of the LLC domain.
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -2279,6 +2279,8 @@ static inline int task_on_rq_migrating(s
>  #define WF_CURRENT_CPU		0x40 /* Prefer to move the wakee to the current CPU. */
>  #define WF_RQ_SELECTED		0x80 /* ->select_task_rq() was called */
>  
> +#define WF_ON_CPU		0x0100
> +
>  static_assert(WF_EXEC == SD_BALANCE_EXEC);
>  static_assert(WF_FORK == SD_BALANCE_FORK);
>  static_assert(WF_TTWU == SD_BALANCE_WAKE);
> 

-- 
Mel Gorman
SUSE Labs
Re: [PATCH v2 05/12] sched: Add ttwu_queue controls
Posted by Vincent Guittot 2 months, 4 weeks ago
On Wed, 2 Jul 2025 at 14:12, Peter Zijlstra <peterz@infradead.org> wrote:
>
> There are two (soon three) callers of ttwu_queue_wakelist(),
> distinguish them with their own WF_ and add some knobs on.
>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> Link: https://lkml.kernel.org/r/20250520101727.874587738@infradead.org
> ---
>  kernel/sched/core.c     |   22 ++++++++++++----------
>  kernel/sched/features.h |    2 ++
>  kernel/sched/sched.h    |    2 ++
>  3 files changed, 16 insertions(+), 10 deletions(-)
>
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -3888,7 +3888,7 @@ bool cpus_share_resources(int this_cpu,
>         return per_cpu(sd_share_id, this_cpu) == per_cpu(sd_share_id, that_cpu);
>  }
>
> -static inline bool ttwu_queue_cond(struct task_struct *p, int cpu)
> +static inline bool ttwu_queue_cond(struct task_struct *p, int cpu, bool def)
>  {
>         /* See SCX_OPS_ALLOW_QUEUED_WAKEUP. */
>         if (!scx_allow_ttwu_queue(p))
> @@ -3929,18 +3929,19 @@ static inline bool ttwu_queue_cond(struc
>         if (!cpu_rq(cpu)->nr_running)
>                 return true;
>
> -       return false;
> +       return def;
>  }
>
>  static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
>  {
> -       if (sched_feat(TTWU_QUEUE) && ttwu_queue_cond(p, cpu)) {
> -               sched_clock_cpu(cpu); /* Sync clocks across CPUs */
> -               __ttwu_queue_wakelist(p, cpu, wake_flags);
> -               return true;
> -       }
> +       bool def = sched_feat(TTWU_QUEUE_DEFAULT);

I'm always confused by this sched feature name because
sched_feat(TTWU_QUEUE_DEFAULT) must be false in order to have the
current (default ?) behaviour
Or you mean queue by default in the wakelist which is disable to keep
current behavior

> +
> +       if (!ttwu_queue_cond(p, cpu, def))
> +               return false;
>
> -       return false;
> +       sched_clock_cpu(cpu); /* Sync clocks across CPUs */
> +       __ttwu_queue_wakelist(p, cpu, wake_flags);
> +       return true;
>  }
>
>  static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
> @@ -3948,7 +3949,7 @@ static void ttwu_queue(struct task_struc
>         struct rq *rq = cpu_rq(cpu);
>         struct rq_flags rf;
>
> -       if (ttwu_queue_wakelist(p, cpu, wake_flags))
> +       if (sched_feat(TTWU_QUEUE) && ttwu_queue_wakelist(p, cpu, wake_flags))
>                 return;
>
>         rq_lock(rq, &rf);
> @@ -4251,7 +4252,8 @@ int try_to_wake_up(struct task_struct *p
>                  * scheduling.
>                  */
>                 if (smp_load_acquire(&p->on_cpu) &&
> -                   ttwu_queue_wakelist(p, task_cpu(p), wake_flags))
> +                   sched_feat(TTWU_QUEUE_ON_CPU) &&
> +                   ttwu_queue_wakelist(p, task_cpu(p), wake_flags | WF_ON_CPU))
>                         break;
>
>                 cpu = select_task_rq(p, p->wake_cpu, &wake_flags);
> --- a/kernel/sched/features.h
> +++ b/kernel/sched/features.h
> @@ -81,6 +81,8 @@ SCHED_FEAT(TTWU_QUEUE, false)
>   */
>  SCHED_FEAT(TTWU_QUEUE, true)
>  #endif
> +SCHED_FEAT(TTWU_QUEUE_ON_CPU, true)
> +SCHED_FEAT(TTWU_QUEUE_DEFAULT, false)
>
>  /*
>   * When doing wakeups, attempt to limit superfluous scans of the LLC domain.
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -2279,6 +2279,8 @@ static inline int task_on_rq_migrating(s
>  #define WF_CURRENT_CPU         0x40 /* Prefer to move the wakee to the current CPU. */
>  #define WF_RQ_SELECTED         0x80 /* ->select_task_rq() was called */
>
> +#define WF_ON_CPU              0x0100
> +
>  static_assert(WF_EXEC == SD_BALANCE_EXEC);
>  static_assert(WF_FORK == SD_BALANCE_FORK);
>  static_assert(WF_TTWU == SD_BALANCE_WAKE);
>
>