Add a CONFIG_SCHED_PROXY_EXEC option, along with a boot argument
sched_proxy_exec= that can be used to disable the feature at boot
time if CONFIG_SCHED_PROXY_EXEC was enabled.
Cc: Joel Fernandes <joelagnelf@nvidia.com>
Cc: Qais Yousef <qyousef@layalina.io>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Valentin Schneider <vschneid@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ben Segall <bsegall@google.com>
Cc: Zimuzo Ezeozue <zezeozue@google.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Will Deacon <will@kernel.org>
Cc: Waiman Long <longman@redhat.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: Metin Kaya <Metin.Kaya@arm.com>
Cc: Xuewen Yan <xuewen.yan94@gmail.com>
Cc: K Prateek Nayak <kprateek.nayak@amd.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: kernel-team@android.com
Tested-by: K Prateek Nayak <kprateek.nayak@amd.com>
Signed-off-by: John Stultz <jstultz@google.com>
---
v7:
* Switch to CONFIG_SCHED_PROXY_EXEC/sched_proxy_exec= as
suggested by Metin Kaya.
* Switch boot arg from =disable/enable to use kstrtobool(),
which supports =yes|no|1|0|true|false|on|off, as also
suggested by Metin Kaya, and print a message when a boot
argument is used.
v8:
* Move CONFIG_SCHED_PROXY_EXEC under Scheduler Features as
Suggested by Metin
* Minor rework reordering with split sched contexts patch
v12:
* Rework for selected -> donor renaming
v14:
* Depend on !PREEMPT_RT to avoid build issues for now
v15:
* Depend on EXPERT while patch series upstreaming is
in progress.
---
.../admin-guide/kernel-parameters.txt | 5 ++++
include/linux/sched.h | 13 +++++++++
init/Kconfig | 10 +++++++
kernel/sched/core.c | 29 +++++++++++++++++++
kernel/sched/sched.h | 12 ++++++++
5 files changed, 69 insertions(+)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index fb8752b42ec85..dcc2443078d00 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -6262,6 +6262,11 @@
sa1100ir [NET]
See drivers/net/irda/sa1100_ir.c.
+ sched_proxy_exec= [KNL]
+ Enables or disables "proxy execution" style
+ solution to mutex-based priority inversion.
+ Format: <bool>
+
sched_verbose [KNL,EARLY] Enables verbose scheduler debug messages.
schedstats= [KNL,X86] Enable or disable scheduled statistics.
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9c15365a30c08..1462f2c70aefc 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1636,6 +1636,19 @@ struct task_struct {
*/
};
+#ifdef CONFIG_SCHED_PROXY_EXEC
+DECLARE_STATIC_KEY_TRUE(__sched_proxy_exec);
+static inline bool sched_proxy_exec(void)
+{
+ return static_branch_likely(&__sched_proxy_exec);
+}
+#else
+static inline bool sched_proxy_exec(void)
+{
+ return false;
+}
+#endif
+
#define TASK_REPORT_IDLE (TASK_REPORT + 1)
#define TASK_REPORT_MAX (TASK_REPORT_IDLE << 1)
diff --git a/init/Kconfig b/init/Kconfig
index d0d021b3fa3b3..b989ddc27444e 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -875,6 +875,16 @@ config UCLAMP_BUCKETS_COUNT
If in doubt, use the default value.
+config SCHED_PROXY_EXEC
+ bool "Proxy Execution"
+ default n
+ # Avoid some build failures w/ PREEMPT_RT until it can be fixed
+ depends on !PREEMPT_RT
+ depends on EXPERT
+ help
+ This option enables proxy execution, a mechanism for mutex-owning
+ tasks to inherit the scheduling context of higher priority waiters.
+
endmenu
#
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 67189907214d3..3968c3967ec38 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -119,6 +119,35 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(sched_compute_energy_tp);
DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
+#ifdef CONFIG_SCHED_PROXY_EXEC
+DEFINE_STATIC_KEY_TRUE(__sched_proxy_exec);
+static int __init setup_proxy_exec(char *str)
+{
+ bool proxy_enable;
+
+ if (kstrtobool(str, &proxy_enable)) {
+ pr_warn("Unable to parse sched_proxy_exec=\n");
+ return 0;
+ }
+
+ if (proxy_enable) {
+ pr_info("sched_proxy_exec enabled via boot arg\n");
+ static_branch_enable(&__sched_proxy_exec);
+ } else {
+ pr_info("sched_proxy_exec disabled via boot arg\n");
+ static_branch_disable(&__sched_proxy_exec);
+ }
+ return 1;
+}
+#else
+static int __init setup_proxy_exec(char *str)
+{
+ pr_warn("CONFIG_SCHED_PROXY_EXEC=n, so it cannot be enabled or disabled at boot time\n");
+ return 0;
+}
+#endif
+__setup("sched_proxy_exec=", setup_proxy_exec);
+
#ifdef CONFIG_SCHED_DEBUG
/*
* Debugging: various feature bits
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index c8512a9fb0229..05d2122533619 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1155,10 +1155,15 @@ struct rq {
*/
unsigned int nr_uninterruptible;
+#ifdef CONFIG_SCHED_PROXY_EXEC
+ struct task_struct __rcu *donor; /* Scheduling context */
+ struct task_struct __rcu *curr; /* Execution context */
+#else
union {
struct task_struct __rcu *donor; /* Scheduler context */
struct task_struct __rcu *curr; /* Execution context */
};
+#endif
struct sched_dl_entity *dl_server;
struct task_struct *idle;
struct task_struct *stop;
@@ -1355,10 +1360,17 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
#define raw_rq() raw_cpu_ptr(&runqueues)
+#ifdef CONFIG_SCHED_PROXY_EXEC
+static inline void rq_set_donor(struct rq *rq, struct task_struct *t)
+{
+ rcu_assign_pointer(rq->donor, t);
+}
+#else
static inline void rq_set_donor(struct rq *rq, struct task_struct *t)
{
/* Do nothing */
}
+#endif
#ifdef CONFIG_SCHED_CORE
static inline struct cpumask *sched_group_span(struct sched_group *sg);
--
2.49.0.rc0.332.g42c0ae87b1-goog
On Wed, Mar 12, 2025 at 03:11:31PM -0700, John Stultz wrote: > diff --git a/init/Kconfig b/init/Kconfig > index d0d021b3fa3b3..b989ddc27444e 100644 > --- a/init/Kconfig > +++ b/init/Kconfig > @@ -875,6 +875,16 @@ config UCLAMP_BUCKETS_COUNT > > If in doubt, use the default value. > > +config SCHED_PROXY_EXEC > + bool "Proxy Execution" > + default n > + # Avoid some build failures w/ PREEMPT_RT until it can be fixed > + depends on !PREEMPT_RT depends on !SPM && !SCHED_CLASS_EXT for now, right? > + depends on EXPERT > + help > + This option enables proxy execution, a mechanism for mutex-owning > + tasks to inherit the scheduling context of higher priority waiters. > +
On Mon, Mar 17, 2025 at 3:33 PM Peter Zijlstra <peterz@infradead.org> wrote: > On Wed, Mar 12, 2025 at 03:11:31PM -0700, John Stultz wrote: > > > diff --git a/init/Kconfig b/init/Kconfig > > index d0d021b3fa3b3..b989ddc27444e 100644 > > --- a/init/Kconfig > > +++ b/init/Kconfig > > @@ -875,6 +875,16 @@ config UCLAMP_BUCKETS_COUNT > > > > If in doubt, use the default value. > > > > +config SCHED_PROXY_EXEC > > + bool "Proxy Execution" > > + default n > > + # Avoid some build failures w/ PREEMPT_RT until it can be fixed > > + depends on !PREEMPT_RT > depends on !SPM && !SCHED_CLASS_EXT > > for now, right? Did you mean SMP there? SMP is supported. Even with same rq proxying, it just blocks the task and lets it sleep if the owner is on another rq. !SCHED_CLASS_EXT might be something to add, but mostly because the SCHED_CLASS_EXT code probably doesn't have a way to understand the split contexts yet. I'll dig a bit more there. Thanks so much for the feedback! Really appreciate it! -john
On Mon, Mar 17, 2025 at 03:44:11PM +0100, John Stultz wrote: > On Mon, Mar 17, 2025 at 3:33 PM Peter Zijlstra <peterz@infradead.org> wrote: > > On Wed, Mar 12, 2025 at 03:11:31PM -0700, John Stultz wrote: > > > > > diff --git a/init/Kconfig b/init/Kconfig > > > index d0d021b3fa3b3..b989ddc27444e 100644 > > > --- a/init/Kconfig > > > +++ b/init/Kconfig > > > @@ -875,6 +875,16 @@ config UCLAMP_BUCKETS_COUNT > > > > > > If in doubt, use the default value. > > > > > > +config SCHED_PROXY_EXEC > > > + bool "Proxy Execution" > > > + default n > > > + # Avoid some build failures w/ PREEMPT_RT until it can be fixed > > > + depends on !PREEMPT_RT > > depends on !SPM && !SCHED_CLASS_EXT > > > > for now, right? > > Did you mean SMP there? SMP is supported. Even with same rq proxying, > it just blocks the task and lets it sleep if the owner is on another > rq. Ah, yes, typing hard. And yes, I missed it would work rq local. Still trying to untangle the proxy_deactivate() failure case :-)
On Wed, 12 Mar 2025 15:11:31 -0700
John Stultz <jstultz@google.com> wrote:
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index fb8752b42ec85..dcc2443078d00 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -6262,6 +6262,11 @@
> sa1100ir [NET]
> See drivers/net/irda/sa1100_ir.c.
>
> + sched_proxy_exec= [KNL]
> + Enables or disables "proxy execution" style
> + solution to mutex-based priority inversion.
> + Format: <bool>
To enable, does this require: sched_proxy_exec=true
Could we just allow it to be:
sched_proxy_exec
Also mean true? That is, both of the above would be true, but to
disable it, you would need: sched_proxy_exec=false.
> +
> sched_verbose [KNL,EARLY] Enables verbose scheduler debug messages.
>
> schedstats= [KNL,X86] Enable or disable scheduled statistics.
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 9c15365a30c08..1462f2c70aefc 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1636,6 +1636,19 @@ struct task_struct {
> */
> };
>
> +#ifdef CONFIG_SCHED_PROXY_EXEC
> +DECLARE_STATIC_KEY_TRUE(__sched_proxy_exec);
> +static inline bool sched_proxy_exec(void)
> +{
> + return static_branch_likely(&__sched_proxy_exec);
> +}
> +#else
> +static inline bool sched_proxy_exec(void)
> +{
> + return false;
> +}
> +#endif
> +
> #define TASK_REPORT_IDLE (TASK_REPORT + 1)
> #define TASK_REPORT_MAX (TASK_REPORT_IDLE << 1)
>
> diff --git a/init/Kconfig b/init/Kconfig
> index d0d021b3fa3b3..b989ddc27444e 100644
> --- a/init/Kconfig
> +++ b/init/Kconfig
> @@ -875,6 +875,16 @@ config UCLAMP_BUCKETS_COUNT
>
> If in doubt, use the default value.
>
> +config SCHED_PROXY_EXEC
> + bool "Proxy Execution"
> + default n
Nit, you don't need "default n" because "default n" is the default ;-)
> + # Avoid some build failures w/ PREEMPT_RT until it can be fixed
> + depends on !PREEMPT_RT
> + depends on EXPERT
> + help
> + This option enables proxy execution, a mechanism for mutex-owning
> + tasks to inherit the scheduling context of higher priority waiters.
> +
> endmenu
>
> #
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 67189907214d3..3968c3967ec38 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -119,6 +119,35 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(sched_compute_energy_tp);
>
> DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
>
> +#ifdef CONFIG_SCHED_PROXY_EXEC
> +DEFINE_STATIC_KEY_TRUE(__sched_proxy_exec);
> +static int __init setup_proxy_exec(char *str)
> +{
> + bool proxy_enable;
> +
> + if (kstrtobool(str, &proxy_enable)) {
To make it work without adding =true, the above could be:
bool proxy_enable = true;
if (*str && kstrtobool(str, &proxy_enable)) {
> + pr_warn("Unable to parse sched_proxy_exec=\n");
> + return 0;
> + }
> +
> + if (proxy_enable) {
> + pr_info("sched_proxy_exec enabled via boot arg\n");
> + static_branch_enable(&__sched_proxy_exec);
> + } else {
> + pr_info("sched_proxy_exec disabled via boot arg\n");
> + static_branch_disable(&__sched_proxy_exec);
> + }
> + return 1;
> +}
-- Steve
On Thu, Mar 13, 2025 at 3:09 AM Steven Rostedt <rostedt@goodmis.org> wrote:
>
> On Wed, 12 Mar 2025 15:11:31 -0700
> John Stultz <jstultz@google.com> wrote:
> > diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> > index fb8752b42ec85..dcc2443078d00 100644
> > --- a/Documentation/admin-guide/kernel-parameters.txt
> > +++ b/Documentation/admin-guide/kernel-parameters.txt
> > @@ -6262,6 +6262,11 @@
> > sa1100ir [NET]
> > See drivers/net/irda/sa1100_ir.c.
> >
> > + sched_proxy_exec= [KNL]
> > + Enables or disables "proxy execution" style
> > + solution to mutex-based priority inversion.
> > + Format: <bool>
>
> To enable, does this require: sched_proxy_exec=true
>
> Could we just allow it to be:
>
> sched_proxy_exec
>
> Also mean true? That is, both of the above would be true, but to
> disable it, you would need: sched_proxy_exec=false.
Currently the flag defaults to true, so I'm not sure if
"sched_proxy_exec" on its own makes as much sense to me.
Though, in the android16-6.12 kernel, I have an additional change that
sets it default to false, which allows "sched_proxy_exec=true" to be
useful. So I'm open to having the argument alone as an enablement
flag (in addition to the explicit setting), but I've personally always
found the mixed conventions there confusing, preferring the explicit
"=true" or "=1" on boot arguments.
> > diff --git a/init/Kconfig b/init/Kconfig
> > index d0d021b3fa3b3..b989ddc27444e 100644
> > --- a/init/Kconfig
> > +++ b/init/Kconfig
> > @@ -875,6 +875,16 @@ config UCLAMP_BUCKETS_COUNT
> >
> > If in doubt, use the default value.
> >
> > +config SCHED_PROXY_EXEC
> > + bool "Proxy Execution"
> > + default n
>
> Nit, you don't need "default n" because "default n" is the default ;-)
Ah, thanks I'll drop that!
> > diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> > index 67189907214d3..3968c3967ec38 100644
> > --- a/kernel/sched/core.c
> > +++ b/kernel/sched/core.c
> > @@ -119,6 +119,35 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(sched_compute_energy_tp);
> >
> > DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
> >
> > +#ifdef CONFIG_SCHED_PROXY_EXEC
> > +DEFINE_STATIC_KEY_TRUE(__sched_proxy_exec);
> > +static int __init setup_proxy_exec(char *str)
> > +{
> > + bool proxy_enable;
> > +
> > + if (kstrtobool(str, &proxy_enable)) {
>
> To make it work without adding =true, the above could be:
>
> bool proxy_enable = true;
>
> if (*str && kstrtobool(str, &proxy_enable)) {
>
Ok, I'll give this a shot.
Thanks so much for the review and feedback! Really appreciate it!
-john
© 2016 - 2025 Red Hat, Inc.