Add a CONFIG_SCHED_PROXY_EXEC option, along with a boot argument
sched_proxy_exec= that can be used to disable the feature at boot
time if CONFIG_SCHED_PROXY_EXEC was enabled.
Also uses this option to allow the rq->donor to be different from
rq->curr.
Tested-by: K Prateek Nayak <kprateek.nayak@amd.com>
Signed-off-by: John Stultz <jstultz@google.com>
---
v7:
* Switch to CONFIG_SCHED_PROXY_EXEC/sched_proxy_exec= as
suggested by Metin Kaya.
* Switch boot arg from =disable/enable to use kstrtobool(),
which supports =yes|no|1|0|true|false|on|off, as also
suggested by Metin Kaya, and print a message when a boot
argument is used.
v8:
* Move CONFIG_SCHED_PROXY_EXEC under Scheduler Features as
Suggested by Metin
* Minor rework reordering with split sched contexts patch
v12:
* Rework for selected -> donor renaming
v14:
* Depend on !PREEMPT_RT to avoid build issues for now
v15:
* Depend on EXPERT while patch series upstreaming is
in progress.
v16:
* Allow "sched_proxy_exec" without "=true" to enable
proxy-execution at boot time, in addition to the
"sched_proxy_exec=true" or "sched_proxy_exec=false" options
as suggested by Steven
* Drop the "default n" in Kconfig as suggested by Steven
* Add !SCHED_CLASS_EXT dependency until I can investigate if
sched_ext can understand split contexts, as suggested by
Peter
v17:
* Expanded the commit message a bit to clarify that the option
allows the split contexts (rq->donor and rq->curr) to be
different tasks, from feedback from Juri Lelli
Cc: Joel Fernandes <joelagnelf@nvidia.com>
Cc: Qais Yousef <qyousef@layalina.io>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Valentin Schneider <vschneid@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ben Segall <bsegall@google.com>
Cc: Zimuzo Ezeozue <zezeozue@google.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Will Deacon <will@kernel.org>
Cc: Waiman Long <longman@redhat.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: Metin Kaya <Metin.Kaya@arm.com>
Cc: Xuewen Yan <xuewen.yan94@gmail.com>
Cc: K Prateek Nayak <kprateek.nayak@amd.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: kuyo chang <kuyo.chang@mediatek.com>
Cc: hupu <hupu.gm@gmail.com>
Cc: kernel-team@android.com
---
.../admin-guide/kernel-parameters.txt | 5 ++++
include/linux/sched.h | 13 +++++++++
init/Kconfig | 12 ++++++++
kernel/sched/core.c | 29 +++++++++++++++++++
kernel/sched/sched.h | 12 ++++++++
5 files changed, 71 insertions(+)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index f1f2c0874da9d..5b89464ca570a 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -6387,6 +6387,11 @@
sa1100ir [NET]
See drivers/net/irda/sa1100_ir.c.
+ sched_proxy_exec= [KNL]
+ Enables or disables "proxy execution" style
+ solution to mutex-based priority inversion.
+ Format: <bool>
+
sched_verbose [KNL,EARLY] Enables verbose scheduler debug messages.
schedstats= [KNL,X86] Enable or disable scheduled statistics.
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4f78a64beb52c..ba2a02ead8c7e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1664,6 +1664,19 @@ struct task_struct {
randomized_struct_fields_end
} __attribute__ ((aligned (64)));
+#ifdef CONFIG_SCHED_PROXY_EXEC
+DECLARE_STATIC_KEY_TRUE(__sched_proxy_exec);
+static inline bool sched_proxy_exec(void)
+{
+ return static_branch_likely(&__sched_proxy_exec);
+}
+#else
+static inline bool sched_proxy_exec(void)
+{
+ return false;
+}
+#endif
+
#define TASK_REPORT_IDLE (TASK_REPORT + 1)
#define TASK_REPORT_MAX (TASK_REPORT_IDLE << 1)
diff --git a/init/Kconfig b/init/Kconfig
index 666783eb50abd..5c2b036c85a9d 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -875,6 +875,18 @@ config UCLAMP_BUCKETS_COUNT
If in doubt, use the default value.
+config SCHED_PROXY_EXEC
+ bool "Proxy Execution"
+ # Avoid some build failures w/ PREEMPT_RT until it can be fixed
+ depends on !PREEMPT_RT
+ # Need to investigate how to inform sched_ext of split contexts
+ depends on !SCHED_CLASS_EXT
+ # Not particularly useful until we get to multi-rq proxying
+ depends on EXPERT
+ help
+ This option enables proxy execution, a mechanism for mutex-owning
+ tasks to inherit the scheduling context of higher priority waiters.
+
endmenu
#
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ec68fc686bd74..45daa0e5b27d3 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -119,6 +119,35 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(sched_compute_energy_tp);
DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
+#ifdef CONFIG_SCHED_PROXY_EXEC
+DEFINE_STATIC_KEY_TRUE(__sched_proxy_exec);
+static int __init setup_proxy_exec(char *str)
+{
+ bool proxy_enable = true;
+
+ if (*str && kstrtobool(str + 1, &proxy_enable)) {
+ pr_warn("Unable to parse sched_proxy_exec=\n");
+ return 0;
+ }
+
+ if (proxy_enable) {
+ pr_info("sched_proxy_exec enabled via boot arg\n");
+ static_branch_enable(&__sched_proxy_exec);
+ } else {
+ pr_info("sched_proxy_exec disabled via boot arg\n");
+ static_branch_disable(&__sched_proxy_exec);
+ }
+ return 1;
+}
+#else
+static int __init setup_proxy_exec(char *str)
+{
+ pr_warn("CONFIG_SCHED_PROXY_EXEC=n, so it cannot be enabled or disabled at boot time\n");
+ return 0;
+}
+#endif
+__setup("sched_proxy_exec", setup_proxy_exec);
+
/*
* Debugging: various feature bits
*
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 475bb5998295e..6b61e0c7f6e78 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1151,10 +1151,15 @@ struct rq {
*/
unsigned int nr_uninterruptible;
+#ifdef CONFIG_SCHED_PROXY_EXEC
+ struct task_struct __rcu *donor; /* Scheduling context */
+ struct task_struct __rcu *curr; /* Execution context */
+#else
union {
struct task_struct __rcu *donor; /* Scheduler context */
struct task_struct __rcu *curr; /* Execution context */
};
+#endif
struct sched_dl_entity *dl_server;
struct task_struct *idle;
struct task_struct *stop;
@@ -1349,10 +1354,17 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
#define raw_rq() raw_cpu_ptr(&runqueues)
+#ifdef CONFIG_SCHED_PROXY_EXEC
+static inline void rq_set_donor(struct rq *rq, struct task_struct *t)
+{
+ rcu_assign_pointer(rq->donor, t);
+}
+#else
static inline void rq_set_donor(struct rq *rq, struct task_struct *t)
{
/* Do nothing */
}
+#endif
#ifdef CONFIG_SCHED_CORE
static inline struct cpumask *sched_group_span(struct sched_group *sg);
--
2.50.0.727.gbf7dc18ff4-goog
The following commit has been merged into the sched/core branch of tip:
Commit-ID: 25c411fce735dda29de26f58d3fce52d4824380c
Gitweb: https://git.kernel.org/tip/25c411fce735dda29de26f58d3fce52d4824380c
Author: John Stultz <jstultz@google.com>
AuthorDate: Sat, 12 Jul 2025 03:33:42
Committer: Peter Zijlstra <peterz@infradead.org>
CommitterDate: Mon, 14 Jul 2025 17:16:31 +02:00
sched: Add CONFIG_SCHED_PROXY_EXEC & boot argument to enable/disable
Add a CONFIG_SCHED_PROXY_EXEC option, along with a boot argument
sched_proxy_exec= that can be used to disable the feature at boot
time if CONFIG_SCHED_PROXY_EXEC was enabled.
Also uses this option to allow the rq->donor to be different from
rq->curr.
Signed-off-by: John Stultz <jstultz@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: K Prateek Nayak <kprateek.nayak@amd.com>
Link: https://lkml.kernel.org/r/20250712033407.2383110-2-jstultz@google.com
---
Documentation/admin-guide/kernel-parameters.txt | 5 +++-
include/linux/sched.h | 13 +++++++-
init/Kconfig | 12 +++++++-
kernel/sched/core.c | 29 ++++++++++++++++-
kernel/sched/sched.h | 12 +++++++-
5 files changed, 71 insertions(+)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 07e22ba..00b8357 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -6387,6 +6387,11 @@
sa1100ir [NET]
See drivers/net/irda/sa1100_ir.c.
+ sched_proxy_exec= [KNL]
+ Enables or disables "proxy execution" style
+ solution to mutex-based priority inversion.
+ Format: <bool>
+
sched_verbose [KNL,EARLY] Enables verbose scheduler debug messages.
schedstats= [KNL,X86] Enable or disable scheduled statistics.
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 54a9126..f225b6b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1656,6 +1656,19 @@ struct task_struct {
randomized_struct_fields_end
} __attribute__ ((aligned (64)));
+#ifdef CONFIG_SCHED_PROXY_EXEC
+DECLARE_STATIC_KEY_TRUE(__sched_proxy_exec);
+static inline bool sched_proxy_exec(void)
+{
+ return static_branch_likely(&__sched_proxy_exec);
+}
+#else
+static inline bool sched_proxy_exec(void)
+{
+ return false;
+}
+#endif
+
#define TASK_REPORT_IDLE (TASK_REPORT + 1)
#define TASK_REPORT_MAX (TASK_REPORT_IDLE << 1)
diff --git a/init/Kconfig b/init/Kconfig
index 965699c..24dd42d 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -878,6 +878,18 @@ config UCLAMP_BUCKETS_COUNT
If in doubt, use the default value.
+config SCHED_PROXY_EXEC
+ bool "Proxy Execution"
+ # Avoid some build failures w/ PREEMPT_RT until it can be fixed
+ depends on !PREEMPT_RT
+ # Need to investigate how to inform sched_ext of split contexts
+ depends on !SCHED_CLASS_EXT
+ # Not particularly useful until we get to multi-rq proxying
+ depends on EXPERT
+ help
+ This option enables proxy execution, a mechanism for mutex-owning
+ tasks to inherit the scheduling context of higher priority waiters.
+
endmenu
#
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index e9c8bda..dd9f5c0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -119,6 +119,35 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(sched_compute_energy_tp);
DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
+#ifdef CONFIG_SCHED_PROXY_EXEC
+DEFINE_STATIC_KEY_TRUE(__sched_proxy_exec);
+static int __init setup_proxy_exec(char *str)
+{
+ bool proxy_enable = true;
+
+ if (*str && kstrtobool(str + 1, &proxy_enable)) {
+ pr_warn("Unable to parse sched_proxy_exec=\n");
+ return 0;
+ }
+
+ if (proxy_enable) {
+ pr_info("sched_proxy_exec enabled via boot arg\n");
+ static_branch_enable(&__sched_proxy_exec);
+ } else {
+ pr_info("sched_proxy_exec disabled via boot arg\n");
+ static_branch_disable(&__sched_proxy_exec);
+ }
+ return 1;
+}
+#else
+static int __init setup_proxy_exec(char *str)
+{
+ pr_warn("CONFIG_SCHED_PROXY_EXEC=n, so it cannot be enabled or disabled at boot time\n");
+ return 0;
+}
+#endif
+__setup("sched_proxy_exec", setup_proxy_exec);
+
/*
* Debugging: various feature bits
*
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index ac953fa..e53d0b8 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1142,10 +1142,15 @@ struct rq {
*/
unsigned long nr_uninterruptible;
+#ifdef CONFIG_SCHED_PROXY_EXEC
+ struct task_struct __rcu *donor; /* Scheduling context */
+ struct task_struct __rcu *curr; /* Execution context */
+#else
union {
struct task_struct __rcu *donor; /* Scheduler context */
struct task_struct __rcu *curr; /* Execution context */
};
+#endif
struct sched_dl_entity *dl_server;
struct task_struct *idle;
struct task_struct *stop;
@@ -1326,10 +1331,17 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
#define raw_rq() raw_cpu_ptr(&runqueues)
+#ifdef CONFIG_SCHED_PROXY_EXEC
+static inline void rq_set_donor(struct rq *rq, struct task_struct *t)
+{
+ rcu_assign_pointer(rq->donor, t);
+}
+#else
static inline void rq_set_donor(struct rq *rq, struct task_struct *t)
{
/* Do nothing */
}
+#endif
#ifdef CONFIG_SCHED_CORE
static inline struct cpumask *sched_group_span(struct sched_group *sg);
On Wed, Jul 16, 2025 at 10:19:14AM -0000 tip-bot2 for John Stultz wrote:
> The following commit has been merged into the sched/core branch of tip:
>
Cool! Good work John. "That was easy" ;)
Cheers,
Phil
> Commit-ID: 25c411fce735dda29de26f58d3fce52d4824380c
> Gitweb: https://git.kernel.org/tip/25c411fce735dda29de26f58d3fce52d4824380c
> Author: John Stultz <jstultz@google.com>
> AuthorDate: Sat, 12 Jul 2025 03:33:42
> Committer: Peter Zijlstra <peterz@infradead.org>
> CommitterDate: Mon, 14 Jul 2025 17:16:31 +02:00
>
> sched: Add CONFIG_SCHED_PROXY_EXEC & boot argument to enable/disable
>
> Add a CONFIG_SCHED_PROXY_EXEC option, along with a boot argument
> sched_proxy_exec= that can be used to disable the feature at boot
> time if CONFIG_SCHED_PROXY_EXEC was enabled.
>
> Also uses this option to allow the rq->donor to be different from
> rq->curr.
>
> Signed-off-by: John Stultz <jstultz@google.com>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> Tested-by: K Prateek Nayak <kprateek.nayak@amd.com>
> Link: https://lkml.kernel.org/r/20250712033407.2383110-2-jstultz@google.com
> ---
> Documentation/admin-guide/kernel-parameters.txt | 5 +++-
> include/linux/sched.h | 13 +++++++-
> init/Kconfig | 12 +++++++-
> kernel/sched/core.c | 29 ++++++++++++++++-
> kernel/sched/sched.h | 12 +++++++-
> 5 files changed, 71 insertions(+)
>
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index 07e22ba..00b8357 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -6387,6 +6387,11 @@
> sa1100ir [NET]
> See drivers/net/irda/sa1100_ir.c.
>
> + sched_proxy_exec= [KNL]
> + Enables or disables "proxy execution" style
> + solution to mutex-based priority inversion.
> + Format: <bool>
> +
> sched_verbose [KNL,EARLY] Enables verbose scheduler debug messages.
>
> schedstats= [KNL,X86] Enable or disable scheduled statistics.
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 54a9126..f225b6b 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1656,6 +1656,19 @@ struct task_struct {
> randomized_struct_fields_end
> } __attribute__ ((aligned (64)));
>
> +#ifdef CONFIG_SCHED_PROXY_EXEC
> +DECLARE_STATIC_KEY_TRUE(__sched_proxy_exec);
> +static inline bool sched_proxy_exec(void)
> +{
> + return static_branch_likely(&__sched_proxy_exec);
> +}
> +#else
> +static inline bool sched_proxy_exec(void)
> +{
> + return false;
> +}
> +#endif
> +
> #define TASK_REPORT_IDLE (TASK_REPORT + 1)
> #define TASK_REPORT_MAX (TASK_REPORT_IDLE << 1)
>
> diff --git a/init/Kconfig b/init/Kconfig
> index 965699c..24dd42d 100644
> --- a/init/Kconfig
> +++ b/init/Kconfig
> @@ -878,6 +878,18 @@ config UCLAMP_BUCKETS_COUNT
>
> If in doubt, use the default value.
>
> +config SCHED_PROXY_EXEC
> + bool "Proxy Execution"
> + # Avoid some build failures w/ PREEMPT_RT until it can be fixed
> + depends on !PREEMPT_RT
> + # Need to investigate how to inform sched_ext of split contexts
> + depends on !SCHED_CLASS_EXT
> + # Not particularly useful until we get to multi-rq proxying
> + depends on EXPERT
> + help
> + This option enables proxy execution, a mechanism for mutex-owning
> + tasks to inherit the scheduling context of higher priority waiters.
> +
> endmenu
>
> #
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index e9c8bda..dd9f5c0 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -119,6 +119,35 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(sched_compute_energy_tp);
>
> DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
>
> +#ifdef CONFIG_SCHED_PROXY_EXEC
> +DEFINE_STATIC_KEY_TRUE(__sched_proxy_exec);
> +static int __init setup_proxy_exec(char *str)
> +{
> + bool proxy_enable = true;
> +
> + if (*str && kstrtobool(str + 1, &proxy_enable)) {
> + pr_warn("Unable to parse sched_proxy_exec=\n");
> + return 0;
> + }
> +
> + if (proxy_enable) {
> + pr_info("sched_proxy_exec enabled via boot arg\n");
> + static_branch_enable(&__sched_proxy_exec);
> + } else {
> + pr_info("sched_proxy_exec disabled via boot arg\n");
> + static_branch_disable(&__sched_proxy_exec);
> + }
> + return 1;
> +}
> +#else
> +static int __init setup_proxy_exec(char *str)
> +{
> + pr_warn("CONFIG_SCHED_PROXY_EXEC=n, so it cannot be enabled or disabled at boot time\n");
> + return 0;
> +}
> +#endif
> +__setup("sched_proxy_exec", setup_proxy_exec);
> +
> /*
> * Debugging: various feature bits
> *
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index ac953fa..e53d0b8 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -1142,10 +1142,15 @@ struct rq {
> */
> unsigned long nr_uninterruptible;
>
> +#ifdef CONFIG_SCHED_PROXY_EXEC
> + struct task_struct __rcu *donor; /* Scheduling context */
> + struct task_struct __rcu *curr; /* Execution context */
> +#else
> union {
> struct task_struct __rcu *donor; /* Scheduler context */
> struct task_struct __rcu *curr; /* Execution context */
> };
> +#endif
> struct sched_dl_entity *dl_server;
> struct task_struct *idle;
> struct task_struct *stop;
> @@ -1326,10 +1331,17 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
> #define cpu_curr(cpu) (cpu_rq(cpu)->curr)
> #define raw_rq() raw_cpu_ptr(&runqueues)
>
> +#ifdef CONFIG_SCHED_PROXY_EXEC
> +static inline void rq_set_donor(struct rq *rq, struct task_struct *t)
> +{
> + rcu_assign_pointer(rq->donor, t);
> +}
> +#else
> static inline void rq_set_donor(struct rq *rq, struct task_struct *t)
> {
> /* Do nothing */
> }
> +#endif
>
> #ifdef CONFIG_SCHED_CORE
> static inline struct cpumask *sched_group_span(struct sched_group *sg);
>
--
© 2016 - 2026 Red Hat, Inc.