[RFC PATCH v15 1/7] sched: Add CONFIG_SCHED_PROXY_EXEC & boot argument to enable/disable

John Stultz posted 7 patches 9 months, 1 week ago
[RFC PATCH v15 1/7] sched: Add CONFIG_SCHED_PROXY_EXEC & boot argument to enable/disable
Posted by John Stultz 9 months, 1 week ago
Add a CONFIG_SCHED_PROXY_EXEC option, along with a boot argument
sched_proxy_exec= that can be used to disable the feature at boot
time if CONFIG_SCHED_PROXY_EXEC was enabled.

Cc: Joel Fernandes <joelagnelf@nvidia.com>
Cc: Qais Yousef <qyousef@layalina.io>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Valentin Schneider <vschneid@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ben Segall <bsegall@google.com>
Cc: Zimuzo Ezeozue <zezeozue@google.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Will Deacon <will@kernel.org>
Cc: Waiman Long <longman@redhat.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: Metin Kaya <Metin.Kaya@arm.com>
Cc: Xuewen Yan <xuewen.yan94@gmail.com>
Cc: K Prateek Nayak <kprateek.nayak@amd.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: kernel-team@android.com
Tested-by: K Prateek Nayak <kprateek.nayak@amd.com>
Signed-off-by: John Stultz <jstultz@google.com>
---
v7:
* Switch to CONFIG_SCHED_PROXY_EXEC/sched_proxy_exec= as
  suggested by Metin Kaya.
* Switch boot arg from =disable/enable to use kstrtobool(),
  which supports =yes|no|1|0|true|false|on|off, as also
  suggested by Metin Kaya, and print a message when a boot
  argument is used.
v8:
* Move CONFIG_SCHED_PROXY_EXEC under Scheduler Features as
  Suggested by Metin
* Minor rework reordering with split sched contexts patch
v12:
* Rework for selected -> donor renaming
v14:
* Depend on !PREEMPT_RT to avoid build issues for now
v15:
* Depend on EXPERT while patch series upstreaming is
  in progress.
---
 .../admin-guide/kernel-parameters.txt         |  5 ++++
 include/linux/sched.h                         | 13 +++++++++
 init/Kconfig                                  | 10 +++++++
 kernel/sched/core.c                           | 29 +++++++++++++++++++
 kernel/sched/sched.h                          | 12 ++++++++
 5 files changed, 69 insertions(+)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index fb8752b42ec85..dcc2443078d00 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -6262,6 +6262,11 @@
 	sa1100ir	[NET]
 			See drivers/net/irda/sa1100_ir.c.
 
+	sched_proxy_exec= [KNL]
+			Enables or disables "proxy execution" style
+			solution to mutex-based priority inversion.
+			Format: <bool>
+
 	sched_verbose	[KNL,EARLY] Enables verbose scheduler debug messages.
 
 	schedstats=	[KNL,X86] Enable or disable scheduled statistics.
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9c15365a30c08..1462f2c70aefc 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1636,6 +1636,19 @@ struct task_struct {
 	 */
 };
 
+#ifdef CONFIG_SCHED_PROXY_EXEC
+DECLARE_STATIC_KEY_TRUE(__sched_proxy_exec);
+static inline bool sched_proxy_exec(void)
+{
+	return static_branch_likely(&__sched_proxy_exec);
+}
+#else
+static inline bool sched_proxy_exec(void)
+{
+	return false;
+}
+#endif
+
 #define TASK_REPORT_IDLE	(TASK_REPORT + 1)
 #define TASK_REPORT_MAX		(TASK_REPORT_IDLE << 1)
 
diff --git a/init/Kconfig b/init/Kconfig
index d0d021b3fa3b3..b989ddc27444e 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -875,6 +875,16 @@ config UCLAMP_BUCKETS_COUNT
 
 	  If in doubt, use the default value.
 
+config SCHED_PROXY_EXEC
+	bool "Proxy Execution"
+	default n
+	# Avoid some build failures w/ PREEMPT_RT until it can be fixed
+	depends on !PREEMPT_RT
+	depends on EXPERT
+	help
+	  This option enables proxy execution, a mechanism for mutex-owning
+	  tasks to inherit the scheduling context of higher priority waiters.
+
 endmenu
 
 #
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 67189907214d3..3968c3967ec38 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -119,6 +119,35 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(sched_compute_energy_tp);
 
 DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
 
+#ifdef CONFIG_SCHED_PROXY_EXEC
+DEFINE_STATIC_KEY_TRUE(__sched_proxy_exec);
+static int __init setup_proxy_exec(char *str)
+{
+	bool proxy_enable;
+
+	if (kstrtobool(str, &proxy_enable)) {
+		pr_warn("Unable to parse sched_proxy_exec=\n");
+		return 0;
+	}
+
+	if (proxy_enable) {
+		pr_info("sched_proxy_exec enabled via boot arg\n");
+		static_branch_enable(&__sched_proxy_exec);
+	} else {
+		pr_info("sched_proxy_exec disabled via boot arg\n");
+		static_branch_disable(&__sched_proxy_exec);
+	}
+	return 1;
+}
+#else
+static int __init setup_proxy_exec(char *str)
+{
+	pr_warn("CONFIG_SCHED_PROXY_EXEC=n, so it cannot be enabled or disabled at boot time\n");
+	return 0;
+}
+#endif
+__setup("sched_proxy_exec=", setup_proxy_exec);
+
 #ifdef CONFIG_SCHED_DEBUG
 /*
  * Debugging: various feature bits
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index c8512a9fb0229..05d2122533619 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1155,10 +1155,15 @@ struct rq {
 	 */
 	unsigned int		nr_uninterruptible;
 
+#ifdef CONFIG_SCHED_PROXY_EXEC
+	struct task_struct __rcu	*donor;  /* Scheduling context */
+	struct task_struct __rcu	*curr;   /* Execution context */
+#else
 	union {
 		struct task_struct __rcu *donor; /* Scheduler context */
 		struct task_struct __rcu *curr;  /* Execution context */
 	};
+#endif
 	struct sched_dl_entity	*dl_server;
 	struct task_struct	*idle;
 	struct task_struct	*stop;
@@ -1355,10 +1360,17 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
 #define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
 #define raw_rq()		raw_cpu_ptr(&runqueues)
 
+#ifdef CONFIG_SCHED_PROXY_EXEC
+static inline void rq_set_donor(struct rq *rq, struct task_struct *t)
+{
+	rcu_assign_pointer(rq->donor, t);
+}
+#else
 static inline void rq_set_donor(struct rq *rq, struct task_struct *t)
 {
 	/* Do nothing */
 }
+#endif
 
 #ifdef CONFIG_SCHED_CORE
 static inline struct cpumask *sched_group_span(struct sched_group *sg);
-- 
2.49.0.rc0.332.g42c0ae87b1-goog
Re: [RFC PATCH v15 1/7] sched: Add CONFIG_SCHED_PROXY_EXEC & boot argument to enable/disable
Posted by Peter Zijlstra 9 months ago
On Wed, Mar 12, 2025 at 03:11:31PM -0700, John Stultz wrote:

> diff --git a/init/Kconfig b/init/Kconfig
> index d0d021b3fa3b3..b989ddc27444e 100644
> --- a/init/Kconfig
> +++ b/init/Kconfig
> @@ -875,6 +875,16 @@ config UCLAMP_BUCKETS_COUNT
>  
>  	  If in doubt, use the default value.
>  
> +config SCHED_PROXY_EXEC
> +	bool "Proxy Execution"
> +	default n
> +	# Avoid some build failures w/ PREEMPT_RT until it can be fixed
> +	depends on !PREEMPT_RT
	depends on !SPM && !SCHED_CLASS_EXT

for now, right?

> +	depends on EXPERT
> +	help
> +	  This option enables proxy execution, a mechanism for mutex-owning
> +	  tasks to inherit the scheduling context of higher priority waiters.
> +
Re: [RFC PATCH v15 1/7] sched: Add CONFIG_SCHED_PROXY_EXEC & boot argument to enable/disable
Posted by John Stultz 9 months ago
On Mon, Mar 17, 2025 at 3:33 PM Peter Zijlstra <peterz@infradead.org> wrote:
> On Wed, Mar 12, 2025 at 03:11:31PM -0700, John Stultz wrote:
>
> > diff --git a/init/Kconfig b/init/Kconfig
> > index d0d021b3fa3b3..b989ddc27444e 100644
> > --- a/init/Kconfig
> > +++ b/init/Kconfig
> > @@ -875,6 +875,16 @@ config UCLAMP_BUCKETS_COUNT
> >
> >         If in doubt, use the default value.
> >
> > +config SCHED_PROXY_EXEC
> > +     bool "Proxy Execution"
> > +     default n
> > +     # Avoid some build failures w/ PREEMPT_RT until it can be fixed
> > +     depends on !PREEMPT_RT
>         depends on !SPM && !SCHED_CLASS_EXT
>
> for now, right?

Did you mean SMP there?  SMP is supported. Even with same rq proxying,
it just blocks the task and lets it sleep if the owner is on another
rq.

!SCHED_CLASS_EXT might be something to add, but mostly because the
SCHED_CLASS_EXT code probably doesn't have a way to understand the
split contexts yet.

I'll dig a bit more there.

Thanks so much for the feedback! Really appreciate it!
-john
Re: [RFC PATCH v15 1/7] sched: Add CONFIG_SCHED_PROXY_EXEC & boot argument to enable/disable
Posted by Peter Zijlstra 9 months ago
On Mon, Mar 17, 2025 at 03:44:11PM +0100, John Stultz wrote:
> On Mon, Mar 17, 2025 at 3:33 PM Peter Zijlstra <peterz@infradead.org> wrote:
> > On Wed, Mar 12, 2025 at 03:11:31PM -0700, John Stultz wrote:
> >
> > > diff --git a/init/Kconfig b/init/Kconfig
> > > index d0d021b3fa3b3..b989ddc27444e 100644
> > > --- a/init/Kconfig
> > > +++ b/init/Kconfig
> > > @@ -875,6 +875,16 @@ config UCLAMP_BUCKETS_COUNT
> > >
> > >         If in doubt, use the default value.
> > >
> > > +config SCHED_PROXY_EXEC
> > > +     bool "Proxy Execution"
> > > +     default n
> > > +     # Avoid some build failures w/ PREEMPT_RT until it can be fixed
> > > +     depends on !PREEMPT_RT
> >         depends on !SPM && !SCHED_CLASS_EXT
> >
> > for now, right?
> 
> Did you mean SMP there?  SMP is supported. Even with same rq proxying,
> it just blocks the task and lets it sleep if the owner is on another
> rq.

Ah, yes, typing hard. And yes, I missed it would work rq local. Still
trying to untangle the proxy_deactivate() failure case :-)
Re: [RFC PATCH v15 1/7] sched: Add CONFIG_SCHED_PROXY_EXEC & boot argument to enable/disable
Posted by Steven Rostedt 9 months, 1 week ago
On Wed, 12 Mar 2025 15:11:31 -0700
John Stultz <jstultz@google.com> wrote:
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index fb8752b42ec85..dcc2443078d00 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -6262,6 +6262,11 @@
>  	sa1100ir	[NET]
>  			See drivers/net/irda/sa1100_ir.c.
>  
> +	sched_proxy_exec= [KNL]
> +			Enables or disables "proxy execution" style
> +			solution to mutex-based priority inversion.
> +			Format: <bool>

To enable, does this require: sched_proxy_exec=true

Could we just allow it to be:

		sched_proxy_exec

Also mean true? That is, both of the above would be true, but to
disable it, you would need: sched_proxy_exec=false.

> +
>  	sched_verbose	[KNL,EARLY] Enables verbose scheduler debug messages.
>  
>  	schedstats=	[KNL,X86] Enable or disable scheduled statistics.
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 9c15365a30c08..1462f2c70aefc 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1636,6 +1636,19 @@ struct task_struct {
>  	 */
>  };
>  
> +#ifdef CONFIG_SCHED_PROXY_EXEC
> +DECLARE_STATIC_KEY_TRUE(__sched_proxy_exec);
> +static inline bool sched_proxy_exec(void)
> +{
> +	return static_branch_likely(&__sched_proxy_exec);
> +}
> +#else
> +static inline bool sched_proxy_exec(void)
> +{
> +	return false;
> +}
> +#endif
> +
>  #define TASK_REPORT_IDLE	(TASK_REPORT + 1)
>  #define TASK_REPORT_MAX		(TASK_REPORT_IDLE << 1)
>  
> diff --git a/init/Kconfig b/init/Kconfig
> index d0d021b3fa3b3..b989ddc27444e 100644
> --- a/init/Kconfig
> +++ b/init/Kconfig
> @@ -875,6 +875,16 @@ config UCLAMP_BUCKETS_COUNT
>  
>  	  If in doubt, use the default value.
>  
> +config SCHED_PROXY_EXEC
> +	bool "Proxy Execution"
> +	default n

Nit, you don't need "default n" because "default n" is the default ;-)

> +	# Avoid some build failures w/ PREEMPT_RT until it can be fixed
> +	depends on !PREEMPT_RT
> +	depends on EXPERT
> +	help
> +	  This option enables proxy execution, a mechanism for mutex-owning
> +	  tasks to inherit the scheduling context of higher priority waiters.
> +
>  endmenu
>  
>  #
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 67189907214d3..3968c3967ec38 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -119,6 +119,35 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(sched_compute_energy_tp);
>  
>  DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
>  
> +#ifdef CONFIG_SCHED_PROXY_EXEC
> +DEFINE_STATIC_KEY_TRUE(__sched_proxy_exec);
> +static int __init setup_proxy_exec(char *str)
> +{
> +	bool proxy_enable;
> +
> +	if (kstrtobool(str, &proxy_enable)) {

To make it work without adding =true, the above could be:

	bool proxy_enable = true;

	if (*str && kstrtobool(str, &proxy_enable)) {

> +		pr_warn("Unable to parse sched_proxy_exec=\n");
> +		return 0;
> +	}
> +
> +	if (proxy_enable) {
> +		pr_info("sched_proxy_exec enabled via boot arg\n");
> +		static_branch_enable(&__sched_proxy_exec);
> +	} else {
> +		pr_info("sched_proxy_exec disabled via boot arg\n");
> +		static_branch_disable(&__sched_proxy_exec);
> +	}
> +	return 1;
> +}

-- Steve
Re: [RFC PATCH v15 1/7] sched: Add CONFIG_SCHED_PROXY_EXEC & boot argument to enable/disable
Posted by John Stultz 9 months, 1 week ago
On Thu, Mar 13, 2025 at 3:09 AM Steven Rostedt <rostedt@goodmis.org> wrote:
>
> On Wed, 12 Mar 2025 15:11:31 -0700
> John Stultz <jstultz@google.com> wrote:
> > diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> > index fb8752b42ec85..dcc2443078d00 100644
> > --- a/Documentation/admin-guide/kernel-parameters.txt
> > +++ b/Documentation/admin-guide/kernel-parameters.txt
> > @@ -6262,6 +6262,11 @@
> >       sa1100ir        [NET]
> >                       See drivers/net/irda/sa1100_ir.c.
> >
> > +     sched_proxy_exec= [KNL]
> > +                     Enables or disables "proxy execution" style
> > +                     solution to mutex-based priority inversion.
> > +                     Format: <bool>
>
> To enable, does this require: sched_proxy_exec=true
>
> Could we just allow it to be:
>
>                 sched_proxy_exec
>
> Also mean true? That is, both of the above would be true, but to
> disable it, you would need: sched_proxy_exec=false.

Currently the flag defaults to true, so I'm not sure if
"sched_proxy_exec" on its own makes as much sense to me.

Though, in the android16-6.12 kernel, I have an additional change that
sets it default to false, which allows "sched_proxy_exec=true" to be
useful.  So I'm open to having the argument alone as an enablement
flag (in addition to the explicit setting), but I've personally always
found the mixed conventions there confusing, preferring the explicit
"=true" or "=1" on boot arguments.


> > diff --git a/init/Kconfig b/init/Kconfig
> > index d0d021b3fa3b3..b989ddc27444e 100644
> > --- a/init/Kconfig
> > +++ b/init/Kconfig
> > @@ -875,6 +875,16 @@ config UCLAMP_BUCKETS_COUNT
> >
> >         If in doubt, use the default value.
> >
> > +config SCHED_PROXY_EXEC
> > +     bool "Proxy Execution"
> > +     default n
>
> Nit, you don't need "default n" because "default n" is the default ;-)

Ah, thanks I'll drop that!

> > diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> > index 67189907214d3..3968c3967ec38 100644
> > --- a/kernel/sched/core.c
> > +++ b/kernel/sched/core.c
> > @@ -119,6 +119,35 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(sched_compute_energy_tp);
> >
> >  DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
> >
> > +#ifdef CONFIG_SCHED_PROXY_EXEC
> > +DEFINE_STATIC_KEY_TRUE(__sched_proxy_exec);
> > +static int __init setup_proxy_exec(char *str)
> > +{
> > +     bool proxy_enable;
> > +
> > +     if (kstrtobool(str, &proxy_enable)) {
>
> To make it work without adding =true, the above could be:
>
>         bool proxy_enable = true;
>
>         if (*str && kstrtobool(str, &proxy_enable)) {
>

Ok, I'll give this a shot.

Thanks so much for the review and feedback! Really appreciate it!
-john