The individual architectures often add the preemption model to the begin
of the backtrace. This is the case on X86 or ARM64 for the "die" case
but not for regular warning. With the addition of DYNAMIC_PREEMPT for
PREEMPT_RT we end up with CONFIG_PREEMPT and CONFIG_PREEMPT_RT set
simultaneously. That means that everyone who tried to add that piece of
information gets it wrong for PREEMPT_RT because PREEMPT is checked
first.
Provide a generic function which returns the current scheduling model
considering LAZY preempt and the current state of PREEMPT_DYNAMIC.
The resulting strings are:
┏━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓
┃ Model ┃ -RT -DYN ┃ +RT -DYN ┃ -RT +DYN ┃ +RT +DYN ┃
┡━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩
│NONE │ NONE │ n/a │ PREEMPT(none) │ n/a │
├───────────┼──────────────┼───────────────────┼────────────────────┼───────────────────┤
│VOLUNTARY │ VOLUNTARY │ n/a │ PREEMPT(voluntary) │ n/a │
├───────────┼──────────────┼───────────────────┼────────────────────┼───────────────────┤
│FULL │ PREEMPT │ PREEMPT_RT │ PREEMPT(full) │ PREEMPT_{RT,full} │
├───────────┼──────────────┼───────────────────┼────────────────────┼───────────────────┤
│LAZY │ PREEMPT_LAZY │ PREEMPT_{RT,LAZY} │ PREEMPT(lazy) │ PREEMPT_{RT,lazy} │
└───────────┴──────────────┴───────────────────┴────────────────────┴───────────────────┘
[ The dynamic building of the string can lead to an empty string if the
function is invoked simultaneously on two CPUs. ]
Co-developed-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Co-developed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Reviewed-by: Shrikanth Hegde <sshegde@linux.ibm.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/preempt.h | 2 ++
kernel/sched/core.c | 47 +++++++++++++++++++++++++++++++++++++++++
kernel/sched/debug.c | 10 +++++----
kernel/sched/sched.h | 1 +
4 files changed, 56 insertions(+), 4 deletions(-)
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index ca86235ac15c0..3e9808f2b5491 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -515,6 +515,8 @@ static inline bool preempt_model_rt(void)
return IS_ENABLED(CONFIG_PREEMPT_RT);
}
+extern const char *preempt_model_str(void);
+
/*
* Does the preemption model allow non-cooperative preemption?
*
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 67189907214d3..f6fba7da40c78 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7647,10 +7647,57 @@ PREEMPT_MODEL_ACCESSOR(lazy);
#else /* !CONFIG_PREEMPT_DYNAMIC: */
+#define preempt_dynamic_mode -1
+
static inline void preempt_dynamic_init(void) { }
#endif /* CONFIG_PREEMPT_DYNAMIC */
+const char *preempt_modes[] = {
+ "none", "voluntary", "full", "lazy", NULL,
+};
+
+const char *preempt_model_str(void)
+{
+ bool brace = IS_ENABLED(CONFIG_PREEMPT_RT) &&
+ (IS_ENABLED(CONFIG_PREEMPT_DYNAMIC) ||
+ IS_ENABLED(CONFIG_PREEMPT_LAZY));
+ static char buf[128];
+
+ if (IS_ENABLED(CONFIG_PREEMPT_BUILD)) {
+ struct seq_buf s;
+
+ seq_buf_init(&s, buf, sizeof(buf));
+ seq_buf_puts(&s, "PREEMPT");
+
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ seq_buf_printf(&s, "%sRT%s",
+ brace ? "_{" : "_",
+ brace ? "," : "");
+
+ if (IS_ENABLED(CONFIG_PREEMPT_DYNAMIC)) {
+ seq_buf_printf(&s, "(%s)%s",
+ preempt_dynamic_mode > 0 ?
+ preempt_modes[preempt_dynamic_mode] : "undef",
+ brace ? "}" : "");
+ return seq_buf_str(&s);
+ }
+
+ if (IS_ENABLED(CONFIG_PREEMPT_LAZY)) {
+ seq_buf_printf(&s, "LAZY%s",
+ brace ? "}" : "");
+ return seq_buf_str(&s);
+ }
+
+ return seq_buf_str(&s);
+ }
+
+ if (IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY_BUILD))
+ return "VOLUNTARY";
+
+ return "NONE";
+}
+
int io_schedule_prepare(void)
{
int old_iowait = current->in_iowait;
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index ef047add7f9e6..39be73969d284 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -244,11 +244,13 @@ static ssize_t sched_dynamic_write(struct file *filp, const char __user *ubuf,
static int sched_dynamic_show(struct seq_file *m, void *v)
{
- static const char * preempt_modes[] = {
- "none", "voluntary", "full", "lazy",
- };
- int j = ARRAY_SIZE(preempt_modes) - !IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY);
int i = IS_ENABLED(CONFIG_PREEMPT_RT) * 2;
+ int j;
+
+ /* Count entries in NULL terminated preempt_modes */
+ for (j = 0; preempt_modes[j]; j++)
+ ;
+ j -= !IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY);
for (; i < j; i++) {
if (preempt_dynamic_mode == i)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index c8512a9fb0229..9c3252fbaee08 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -3619,6 +3619,7 @@ extern int preempt_dynamic_mode;
extern int sched_dynamic_mode(const char *str);
extern void sched_dynamic_update(int mode);
#endif
+extern const char *preempt_modes[];
#ifdef CONFIG_SCHED_MM_CID
--
2.47.2
* Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:
> +const char *preempt_modes[] = {
> + "none", "voluntary", "full", "lazy", NULL,
> +};
> + /* Count entries in NULL terminated preempt_modes */
> + for (j = 0; preempt_modes[j]; j++)
> + ;
I'm pretty sure the build-time ARRAY_SIZE() primitive is superior here. ;-)
Thanks,
Ingo
On 2025-03-16 12:15:47 [+0100], Ingo Molnar wrote:
>
> * Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:
>
> > +const char *preempt_modes[] = {
> > + "none", "voluntary", "full", "lazy", NULL,
> > +};
>
> > + /* Count entries in NULL terminated preempt_modes */
> > + for (j = 0; preempt_modes[j]; j++)
> > + ;
>
> I'm pretty sure the build-time ARRAY_SIZE() primitive is superior here. ;-)
It would be but it is not an option.
That array is defined in core.c where it is "always" required while
debug.c needs it optionally. core.c is its one compile unit while
debug.c is included by build_utility.c. So I don't see how this can work
unless we shift things:
| CC kernel/sched/build_utility.o
| In file included from include/linux/kernel.h:16,
| from include/linux/cpumask.h:11,
| from include/linux/smp.h:13,
| from include/linux/sched/clock.h:5,
| from kernel/sched/build_utility.c:12:
| kernel/sched/debug.c: In function ‘sched_dynamic_show’:
| include/linux/array_size.h:11:32: error: invalid application of ‘sizeof’ to incomplete type ‘const char *[]’
| 11 | #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr))
| | ^
| kernel/sched/debug.c:250:13: note: in expansion of macro ‘ARRAY_SIZE’
| 250 | j = ARRAY_SIZE(preempt_modes);
| | ^~~~~~~~~~
> Thanks,
>
> Ingo
Sebastian
* Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:
> On 2025-03-16 12:15:47 [+0100], Ingo Molnar wrote:
> >
> > * Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:
> >
> > > +const char *preempt_modes[] = {
> > > + "none", "voluntary", "full", "lazy", NULL,
> > > +};
> >
> > > + /* Count entries in NULL terminated preempt_modes */
> > > + for (j = 0; preempt_modes[j]; j++)
> > > + ;
> >
> > I'm pretty sure the build-time ARRAY_SIZE() primitive is superior here. ;-)
>
> It would be but it is not an option.
> That array is defined in core.c where it is "always" required while
> debug.c needs it optionally. core.c is its one compile unit while
> debug.c is included by build_utility.c. So I don't see how this can work
> unless we shift things:
Why not have it all in debug.c?
Thanks,
Ingo
On 2025-03-17 10:12:31 [+0100], Ingo Molnar wrote:
>
> * Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:
>
> > On 2025-03-16 12:15:47 [+0100], Ingo Molnar wrote:
> > >
> > > * Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:
> > >
> > > > +const char *preempt_modes[] = {
> > > > + "none", "voluntary", "full", "lazy", NULL,
> > > > +};
> > >
> > > > + /* Count entries in NULL terminated preempt_modes */
> > > > + for (j = 0; preempt_modes[j]; j++)
> > > > + ;
> > >
> > > I'm pretty sure the build-time ARRAY_SIZE() primitive is superior here. ;-)
> >
> > It would be but it is not an option.
> > That array is defined in core.c where it is "always" required while
> > debug.c needs it optionally. core.c is its one compile unit while
> > debug.c is included by build_utility.c. So I don't see how this can work
> > unless we shift things:
>
> Why not have it all in debug.c?
The debug.c include is behind CONFIG_SCHED_DEBUG. This needs to be moved
into debug.c itself so that code can be added regardless of
CONFIG_SCHED_DEBUG. It is not only sched-debug after that.
Then we have `preempt_dynamic_mode' logic which is in core.c but it is
exported and used in debug.c
So if all this is not a concern then I can move it.
> Thanks,
>
> Ingo
Sebastian
* Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:
> On 2025-03-17 10:12:31 [+0100], Ingo Molnar wrote:
> >
> > * Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:
> >
> > > On 2025-03-16 12:15:47 [+0100], Ingo Molnar wrote:
> > > >
> > > > * Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:
> > > >
> > > > > +const char *preempt_modes[] = {
> > > > > + "none", "voluntary", "full", "lazy", NULL,
> > > > > +};
> > > >
> > > > > + /* Count entries in NULL terminated preempt_modes */
> > > > > + for (j = 0; preempt_modes[j]; j++)
> > > > > + ;
> > > >
> > > > I'm pretty sure the build-time ARRAY_SIZE() primitive is superior here. ;-)
> > >
> > > It would be but it is not an option.
> > > That array is defined in core.c where it is "always" required while
> > > debug.c needs it optionally. core.c is its one compile unit while
> > > debug.c is included by build_utility.c. So I don't see how this can work
> > > unless we shift things:
> >
> > Why not have it all in debug.c?
>
> The debug.c include is behind CONFIG_SCHED_DEBUG. This needs to be moved
> into debug.c itself so that code can be added regardless of
> CONFIG_SCHED_DEBUG. It is not only sched-debug after that.
Yeah, that's a valid concern.
The thing is, CONFIG_SCHED_DEBUG is mostly meaningless these days - all
major distributions enable it because of the statistics are useful for
system administration, tooling and general software development.
So we should enable it permanently and remove the #ifdeffery. I'll send
out a series to do so soon.
Thanks,
Ingo
* Ingo Molnar <mingo@kernel.org> wrote:
>
> * Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:
>
> > On 2025-03-17 10:12:31 [+0100], Ingo Molnar wrote:
> > >
> > > * Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:
> > >
> > > > On 2025-03-16 12:15:47 [+0100], Ingo Molnar wrote:
> > > > >
> > > > > * Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:
> > > > >
> > > > > > +const char *preempt_modes[] = {
> > > > > > + "none", "voluntary", "full", "lazy", NULL,
> > > > > > +};
> > > > >
> > > > > > + /* Count entries in NULL terminated preempt_modes */
> > > > > > + for (j = 0; preempt_modes[j]; j++)
> > > > > > + ;
> > > > >
> > > > > I'm pretty sure the build-time ARRAY_SIZE() primitive is superior here. ;-)
> > > >
> > > > It would be but it is not an option.
> > > > That array is defined in core.c where it is "always" required while
> > > > debug.c needs it optionally. core.c is its one compile unit while
> > > > debug.c is included by build_utility.c. So I don't see how this can work
> > > > unless we shift things:
> > >
> > > Why not have it all in debug.c?
> >
> > The debug.c include is behind CONFIG_SCHED_DEBUG. This needs to be moved
> > into debug.c itself so that code can be added regardless of
> > CONFIG_SCHED_DEBUG. It is not only sched-debug after that.
>
> Yeah, that's a valid concern.
>
> The thing is, CONFIG_SCHED_DEBUG is mostly meaningless these days - all
> major distributions enable it because of the statistics are useful for
> system administration, tooling and general software development.
>
> So we should enable it permanently and remove the #ifdeffery. I'll send
> out a series to do so soon.
Here's the series:
https://lore.kernel.org/r/20250317104257.3496611-1-mingo@kernel.org
Ingo Molnar (5):
sched/debug: Change SCHED_WARN_ON() to WARN_ON_ONCE()
sched/debug: Make 'const_debug' tunables unconditional __read_mostly
sched/debug: Make CONFIG_SCHED_DEBUG functionality unconditional
sched/debug, Documentation: Remove (most) CONFIG_SCHED_DEBUG references from documentation
sched/debug: Remove CONFIG_SCHED_DEBUG
Forgot to Cc: you - will do so in future versions.
Thanks,
Ingo
On 2025-03-17 11:18:37 [+0100], Ingo Molnar wrote: > Yeah, that's a valid concern. > > The thing is, CONFIG_SCHED_DEBUG is mostly meaningless these days - all > major distributions enable it because of the statistics are useful for > system administration, tooling and general software development. > > So we should enable it permanently and remove the #ifdeffery. I'll send > out a series to do so soon. If that is the way to go, the please Cc me and I rebase on top of it. > Thanks, > > Ingo Sebastian
* Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote: > On 2025-03-17 11:18:37 [+0100], Ingo Molnar wrote: > > Yeah, that's a valid concern. > > > > The thing is, CONFIG_SCHED_DEBUG is mostly meaningless these days - all > > major distributions enable it because of the statistics are useful for > > system administration, tooling and general software development. > > > > So we should enable it permanently and remove the #ifdeffery. I'll send > > out a series to do so soon. > > If that is the way to go, the please Cc me and I rebase on top of it. Let's do it the other way around - your series is mature enough I think, and the merge window is close. Thanks, Ingo
The following commit has been merged into the sched/core branch of tip:
Commit-ID: 8bdc5daaa01e3054647d394d354762210ad88f17
Gitweb: https://git.kernel.org/tip/8bdc5daaa01e3054647d394d354762210ad88f17
Author: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
AuthorDate: Fri, 14 Mar 2025 17:08:02 +01:00
Committer: Peter Zijlstra <peterz@infradead.org>
CommitterDate: Mon, 17 Mar 2025 11:23:38 +01:00
sched: Add a generic function to return the preemption string
The individual architectures often add the preemption model to the begin
of the backtrace. This is the case on X86 or ARM64 for the "die" case
but not for regular warning. With the addition of DYNAMIC_PREEMPT for
PREEMPT_RT we end up with CONFIG_PREEMPT and CONFIG_PREEMPT_RT set
simultaneously. That means that everyone who tried to add that piece of
information gets it wrong for PREEMPT_RT because PREEMPT is checked
first.
Provide a generic function which returns the current scheduling model
considering LAZY preempt and the current state of PREEMPT_DYNAMIC.
The resulting strings are:
┏━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓
┃ Model ┃ -RT -DYN ┃ +RT -DYN ┃ -RT +DYN ┃ +RT +DYN ┃
┡━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩
│NONE │ NONE │ n/a │ PREEMPT(none) │ n/a │
├───────────┼──────────────┼───────────────────┼────────────────────┼───────────────────┤
│VOLUNTARY │ VOLUNTARY │ n/a │ PREEMPT(voluntary) │ n/a │
├───────────┼──────────────┼───────────────────┼────────────────────┼───────────────────┤
│FULL │ PREEMPT │ PREEMPT_RT │ PREEMPT(full) │ PREEMPT_{RT,full} │
├───────────┼──────────────┼───────────────────┼────────────────────┼───────────────────┤
│LAZY │ PREEMPT_LAZY │ PREEMPT_{RT,LAZY} │ PREEMPT(lazy) │ PREEMPT_{RT,lazy} │
└───────────┴──────────────┴───────────────────┴────────────────────┴───────────────────┘
[ The dynamic building of the string can lead to an empty string if the
function is invoked simultaneously on two CPUs. ]
Co-developed-by: "Peter Zijlstra (Intel)" <peterz@infradead.org>
Signed-off-by: "Peter Zijlstra (Intel)" <peterz@infradead.org>
Co-developed-by: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Signed-off-by: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Shrikanth Hegde <sshegde@linux.ibm.com>
Link: https://lore.kernel.org/r/20250314160810.2373416-2-bigeasy@linutronix.de
---
include/linux/preempt.h | 2 ++-
kernel/sched/core.c | 47 ++++++++++++++++++++++++++++++++++++++++-
kernel/sched/debug.c | 10 +++++----
kernel/sched/sched.h | 1 +-
4 files changed, 56 insertions(+), 4 deletions(-)
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index ca86235..3e9808f 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -515,6 +515,8 @@ static inline bool preempt_model_rt(void)
return IS_ENABLED(CONFIG_PREEMPT_RT);
}
+extern const char *preempt_model_str(void);
+
/*
* Does the preemption model allow non-cooperative preemption?
*
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 03d7b63..c734724 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7646,10 +7646,57 @@ PREEMPT_MODEL_ACCESSOR(lazy);
#else /* !CONFIG_PREEMPT_DYNAMIC: */
+#define preempt_dynamic_mode -1
+
static inline void preempt_dynamic_init(void) { }
#endif /* CONFIG_PREEMPT_DYNAMIC */
+const char *preempt_modes[] = {
+ "none", "voluntary", "full", "lazy", NULL,
+};
+
+const char *preempt_model_str(void)
+{
+ bool brace = IS_ENABLED(CONFIG_PREEMPT_RT) &&
+ (IS_ENABLED(CONFIG_PREEMPT_DYNAMIC) ||
+ IS_ENABLED(CONFIG_PREEMPT_LAZY));
+ static char buf[128];
+
+ if (IS_ENABLED(CONFIG_PREEMPT_BUILD)) {
+ struct seq_buf s;
+
+ seq_buf_init(&s, buf, sizeof(buf));
+ seq_buf_puts(&s, "PREEMPT");
+
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ seq_buf_printf(&s, "%sRT%s",
+ brace ? "_{" : "_",
+ brace ? "," : "");
+
+ if (IS_ENABLED(CONFIG_PREEMPT_DYNAMIC)) {
+ seq_buf_printf(&s, "(%s)%s",
+ preempt_dynamic_mode > 0 ?
+ preempt_modes[preempt_dynamic_mode] : "undef",
+ brace ? "}" : "");
+ return seq_buf_str(&s);
+ }
+
+ if (IS_ENABLED(CONFIG_PREEMPT_LAZY)) {
+ seq_buf_printf(&s, "LAZY%s",
+ brace ? "}" : "");
+ return seq_buf_str(&s);
+ }
+
+ return seq_buf_str(&s);
+ }
+
+ if (IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY_BUILD))
+ return "VOLUNTARY";
+
+ return "NONE";
+}
+
int io_schedule_prepare(void)
{
int old_iowait = current->in_iowait;
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index ef047ad..39be739 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -244,11 +244,13 @@ static ssize_t sched_dynamic_write(struct file *filp, const char __user *ubuf,
static int sched_dynamic_show(struct seq_file *m, void *v)
{
- static const char * preempt_modes[] = {
- "none", "voluntary", "full", "lazy",
- };
- int j = ARRAY_SIZE(preempt_modes) - !IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY);
int i = IS_ENABLED(CONFIG_PREEMPT_RT) * 2;
+ int j;
+
+ /* Count entries in NULL terminated preempt_modes */
+ for (j = 0; preempt_modes[j]; j++)
+ ;
+ j -= !IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY);
for (; i < j; i++) {
if (preempt_dynamic_mode == i)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 0212a0c..e8915ad 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -3633,6 +3633,7 @@ extern int preempt_dynamic_mode;
extern int sched_dynamic_mode(const char *str);
extern void sched_dynamic_update(int mode);
#endif
+extern const char *preempt_modes[];
#ifdef CONFIG_SCHED_MM_CID
© 2016 - 2025 Red Hat, Inc.