include/linux/sched/ext.h | 4 ++-- kernel/sched/ext.c | 25 ++++++++++++++++++++++--- kernel/watchdog.c | 2 +- 3 files changed, 25 insertions(+), 6 deletions(-)
With the buddy lockup detector, smp_processor_id() returns the detecting CPU,
not the locked CPU, making scx_hardlockup()'s printouts confusing. Pass the
locked CPU number from watchdog_hardlockup_check() as a parameter instead.
Also add kerneldoc comments to handle_lockup(), scx_hardlockup(), and
scx_rcu_cpu_stall() documenting their return value semantics.
Suggested-by: Doug Anderson <dianders@chromium.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
include/linux/sched/ext.h | 4 ++--
kernel/sched/ext.c | 25 ++++++++++++++++++++++---
kernel/watchdog.c | 2 +-
3 files changed, 25 insertions(+), 6 deletions(-)
diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index 70ee5c28a74d..bcb962d5ee7d 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -230,7 +230,7 @@ struct sched_ext_entity {
void sched_ext_dead(struct task_struct *p);
void print_scx_info(const char *log_lvl, struct task_struct *p);
void scx_softlockup(u32 dur_s);
-bool scx_hardlockup(void);
+bool scx_hardlockup(int cpu);
bool scx_rcu_cpu_stall(void);
#else /* !CONFIG_SCHED_CLASS_EXT */
@@ -238,7 +238,7 @@ bool scx_rcu_cpu_stall(void);
static inline void sched_ext_dead(struct task_struct *p) {}
static inline void print_scx_info(const char *log_lvl, struct task_struct *p) {}
static inline void scx_softlockup(u32 dur_s) {}
-static inline bool scx_hardlockup(void) { return false; }
+static inline bool scx_hardlockup(int cpu) { return false; }
static inline bool scx_rcu_cpu_stall(void) { return false; }
#endif /* CONFIG_SCHED_CLASS_EXT */
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 8a3b8f64a06b..918573f3f088 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -3687,6 +3687,17 @@ bool scx_allow_ttwu_queue(const struct task_struct *p)
return false;
}
+/**
+ * handle_lockup - sched_ext common lockup handler
+ * @fmt: format string
+ *
+ * Called on system stall or lockup condition and initiates abort of sched_ext
+ * if enabled, which may resolve the reported lockup.
+ *
+ * Returns %true if sched_ext is enabled and abort was initiated, which may
+ * resolve the lockup. %false if sched_ext is not enabled or abort was already
+ * initiated by someone else.
+ */
static __printf(1, 2) bool handle_lockup(const char *fmt, ...)
{
struct scx_sched *sch;
@@ -3718,6 +3729,10 @@ static __printf(1, 2) bool handle_lockup(const char *fmt, ...)
* that may not be caused by the current BPF scheduler, try kicking out the
* current scheduler in an attempt to recover the system to a good state before
* issuing panics.
+ *
+ * Returns %true if sched_ext is enabled and abort was initiated, which may
+ * resolve the reported RCU stall. %false if sched_ext is not enabled or someone
+ * else already initiated abort.
*/
bool scx_rcu_cpu_stall(void)
{
@@ -3750,14 +3765,18 @@ void scx_softlockup(u32 dur_s)
* numerous affinitized tasks in a single queue and directing all CPUs at it.
* Try kicking out the current scheduler in an attempt to recover the system to
* a good state before taking more drastic actions.
+ *
+ * Returns %true if sched_ext is enabled and abort was initiated, which may
+ * resolve the reported hardlockdup. %false if sched_ext is not enabled or
+ * someone else already initiated abort.
*/
-bool scx_hardlockup(void)
+bool scx_hardlockup(int cpu)
{
- if (!handle_lockup("hard lockup - CPU %d", smp_processor_id()))
+ if (!handle_lockup("hard lockup - CPU %d", cpu))
return false;
printk_deferred(KERN_ERR "sched_ext: Hard lockup - CPU %d, disabling BPF scheduler\n",
- smp_processor_id());
+ cpu);
return true;
}
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 8dfac4a8f587..873020a2a581 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -203,7 +203,7 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs)
* only once when sched_ext is enabled and will immediately
* abort the BPF scheduler and print out a warning message.
*/
- if (scx_hardlockup())
+ if (scx_hardlockup(cpu))
return;
/* Only print hardlockups once. */
Applied to sched_ext/for-6.19. Thanks. -- tejun
Applied to sched_ext/for-6.19. Thanks. -- tejun
Hi, On Thu, Nov 13, 2025 at 5:33 PM Tejun Heo <tj@kernel.org> wrote: > > With the buddy lockup detector, smp_processor_id() returns the detecting CPU, > not the locked CPU, making scx_hardlockup()'s printouts confusing. Pass the > locked CPU number from watchdog_hardlockup_check() as a parameter instead. > > Also add kerneldoc comments to handle_lockup(), scx_hardlockup(), and > scx_rcu_cpu_stall() documenting their return value semantics. > > Suggested-by: Doug Anderson <dianders@chromium.org> > Signed-off-by: Tejun Heo <tj@kernel.org> > --- > include/linux/sched/ext.h | 4 ++-- > kernel/sched/ext.c | 25 ++++++++++++++++++++++--- > kernel/watchdog.c | 2 +- > 3 files changed, 25 insertions(+), 6 deletions(-) Reviewed-by: Douglas Anderson <dianders@chromium.org>
On Thu, Nov 13, 2025 at 03:33:41PM -1000, Tejun Heo wrote:
> With the buddy lockup detector, smp_processor_id() returns the detecting CPU,
> not the locked CPU, making scx_hardlockup()'s printouts confusing. Pass the
> locked CPU number from watchdog_hardlockup_check() as a parameter instead.
>
> Also add kerneldoc comments to handle_lockup(), scx_hardlockup(), and
> scx_rcu_cpu_stall() documenting their return value semantics.
>
> Suggested-by: Doug Anderson <dianders@chromium.org>
> Signed-off-by: Tejun Heo <tj@kernel.org>
Makes sense.
Acked-by: Andrea Righi <arighi@nvidia.com>
Thanks,
-Andrea
> ---
> include/linux/sched/ext.h | 4 ++--
> kernel/sched/ext.c | 25 ++++++++++++++++++++++---
> kernel/watchdog.c | 2 +-
> 3 files changed, 25 insertions(+), 6 deletions(-)
>
> diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
> index 70ee5c28a74d..bcb962d5ee7d 100644
> --- a/include/linux/sched/ext.h
> +++ b/include/linux/sched/ext.h
> @@ -230,7 +230,7 @@ struct sched_ext_entity {
> void sched_ext_dead(struct task_struct *p);
> void print_scx_info(const char *log_lvl, struct task_struct *p);
> void scx_softlockup(u32 dur_s);
> -bool scx_hardlockup(void);
> +bool scx_hardlockup(int cpu);
> bool scx_rcu_cpu_stall(void);
>
> #else /* !CONFIG_SCHED_CLASS_EXT */
> @@ -238,7 +238,7 @@ bool scx_rcu_cpu_stall(void);
> static inline void sched_ext_dead(struct task_struct *p) {}
> static inline void print_scx_info(const char *log_lvl, struct task_struct *p) {}
> static inline void scx_softlockup(u32 dur_s) {}
> -static inline bool scx_hardlockup(void) { return false; }
> +static inline bool scx_hardlockup(int cpu) { return false; }
> static inline bool scx_rcu_cpu_stall(void) { return false; }
>
> #endif /* CONFIG_SCHED_CLASS_EXT */
> diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
> index 8a3b8f64a06b..918573f3f088 100644
> --- a/kernel/sched/ext.c
> +++ b/kernel/sched/ext.c
> @@ -3687,6 +3687,17 @@ bool scx_allow_ttwu_queue(const struct task_struct *p)
> return false;
> }
>
> +/**
> + * handle_lockup - sched_ext common lockup handler
> + * @fmt: format string
> + *
> + * Called on system stall or lockup condition and initiates abort of sched_ext
> + * if enabled, which may resolve the reported lockup.
> + *
> + * Returns %true if sched_ext is enabled and abort was initiated, which may
> + * resolve the lockup. %false if sched_ext is not enabled or abort was already
> + * initiated by someone else.
> + */
> static __printf(1, 2) bool handle_lockup(const char *fmt, ...)
> {
> struct scx_sched *sch;
> @@ -3718,6 +3729,10 @@ static __printf(1, 2) bool handle_lockup(const char *fmt, ...)
> * that may not be caused by the current BPF scheduler, try kicking out the
> * current scheduler in an attempt to recover the system to a good state before
> * issuing panics.
> + *
> + * Returns %true if sched_ext is enabled and abort was initiated, which may
> + * resolve the reported RCU stall. %false if sched_ext is not enabled or someone
> + * else already initiated abort.
> */
> bool scx_rcu_cpu_stall(void)
> {
> @@ -3750,14 +3765,18 @@ void scx_softlockup(u32 dur_s)
> * numerous affinitized tasks in a single queue and directing all CPUs at it.
> * Try kicking out the current scheduler in an attempt to recover the system to
> * a good state before taking more drastic actions.
> + *
> + * Returns %true if sched_ext is enabled and abort was initiated, which may
> + * resolve the reported hardlockdup. %false if sched_ext is not enabled or
> + * someone else already initiated abort.
> */
> -bool scx_hardlockup(void)
> +bool scx_hardlockup(int cpu)
> {
> - if (!handle_lockup("hard lockup - CPU %d", smp_processor_id()))
> + if (!handle_lockup("hard lockup - CPU %d", cpu))
> return false;
>
> printk_deferred(KERN_ERR "sched_ext: Hard lockup - CPU %d, disabling BPF scheduler\n",
> - smp_processor_id());
> + cpu);
> return true;
> }
>
> diff --git a/kernel/watchdog.c b/kernel/watchdog.c
> index 8dfac4a8f587..873020a2a581 100644
> --- a/kernel/watchdog.c
> +++ b/kernel/watchdog.c
> @@ -203,7 +203,7 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs)
> * only once when sched_ext is enabled and will immediately
> * abort the BPF scheduler and print out a warning message.
> */
> - if (scx_hardlockup())
> + if (scx_hardlockup(cpu))
> return;
>
> /* Only print hardlockups once. */
On Thu, Nov 13, 2025 at 8:34 PM Tejun Heo <tj@kernel.org> wrote:
>
> With the buddy lockup detector, smp_processor_id() returns the detecting CPU,
> not the locked CPU, making scx_hardlockup()'s printouts confusing. Pass the
> locked CPU number from watchdog_hardlockup_check() as a parameter instead.
>
> Also add kerneldoc comments to handle_lockup(), scx_hardlockup(), and
> scx_rcu_cpu_stall() documenting their return value semantics.
>
> Suggested-by: Doug Anderson <dianders@chromium.org>
> Signed-off-by: Tejun Heo <tj@kernel.org>
> ---
Reviewed-by: Emil Tsalapatis <emil@etsalapatis.com>
> include/linux/sched/ext.h | 4 ++--
> kernel/sched/ext.c | 25 ++++++++++++++++++++++---
> kernel/watchdog.c | 2 +-
> 3 files changed, 25 insertions(+), 6 deletions(-)
>
> diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
> index 70ee5c28a74d..bcb962d5ee7d 100644
> --- a/include/linux/sched/ext.h
> +++ b/include/linux/sched/ext.h
> @@ -230,7 +230,7 @@ struct sched_ext_entity {
> void sched_ext_dead(struct task_struct *p);
> void print_scx_info(const char *log_lvl, struct task_struct *p);
> void scx_softlockup(u32 dur_s);
> -bool scx_hardlockup(void);
> +bool scx_hardlockup(int cpu);
> bool scx_rcu_cpu_stall(void);
>
> #else /* !CONFIG_SCHED_CLASS_EXT */
> @@ -238,7 +238,7 @@ bool scx_rcu_cpu_stall(void);
> static inline void sched_ext_dead(struct task_struct *p) {}
> static inline void print_scx_info(const char *log_lvl, struct task_struct *p) {}
> static inline void scx_softlockup(u32 dur_s) {}
> -static inline bool scx_hardlockup(void) { return false; }
> +static inline bool scx_hardlockup(int cpu) { return false; }
> static inline bool scx_rcu_cpu_stall(void) { return false; }
>
> #endif /* CONFIG_SCHED_CLASS_EXT */
> diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
> index 8a3b8f64a06b..918573f3f088 100644
> --- a/kernel/sched/ext.c
> +++ b/kernel/sched/ext.c
> @@ -3687,6 +3687,17 @@ bool scx_allow_ttwu_queue(const struct task_struct *p)
> return false;
> }
>
> +/**
> + * handle_lockup - sched_ext common lockup handler
> + * @fmt: format string
> + *
> + * Called on system stall or lockup condition and initiates abort of sched_ext
> + * if enabled, which may resolve the reported lockup.
> + *
> + * Returns %true if sched_ext is enabled and abort was initiated, which may
> + * resolve the lockup. %false if sched_ext is not enabled or abort was already
> + * initiated by someone else.
> + */
> static __printf(1, 2) bool handle_lockup(const char *fmt, ...)
> {
> struct scx_sched *sch;
> @@ -3718,6 +3729,10 @@ static __printf(1, 2) bool handle_lockup(const char *fmt, ...)
> * that may not be caused by the current BPF scheduler, try kicking out the
> * current scheduler in an attempt to recover the system to a good state before
> * issuing panics.
> + *
> + * Returns %true if sched_ext is enabled and abort was initiated, which may
> + * resolve the reported RCU stall. %false if sched_ext is not enabled or someone
> + * else already initiated abort.
> */
> bool scx_rcu_cpu_stall(void)
> {
> @@ -3750,14 +3765,18 @@ void scx_softlockup(u32 dur_s)
> * numerous affinitized tasks in a single queue and directing all CPUs at it.
> * Try kicking out the current scheduler in an attempt to recover the system to
> * a good state before taking more drastic actions.
> + *
> + * Returns %true if sched_ext is enabled and abort was initiated, which may
> + * resolve the reported hardlockdup. %false if sched_ext is not enabled or
> + * someone else already initiated abort.
> */
> -bool scx_hardlockup(void)
> +bool scx_hardlockup(int cpu)
> {
> - if (!handle_lockup("hard lockup - CPU %d", smp_processor_id()))
> + if (!handle_lockup("hard lockup - CPU %d", cpu))
> return false;
>
> printk_deferred(KERN_ERR "sched_ext: Hard lockup - CPU %d, disabling BPF scheduler\n",
> - smp_processor_id());
> + cpu);
> return true;
> }
>
> diff --git a/kernel/watchdog.c b/kernel/watchdog.c
> index 8dfac4a8f587..873020a2a581 100644
> --- a/kernel/watchdog.c
> +++ b/kernel/watchdog.c
> @@ -203,7 +203,7 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs)
> * only once when sched_ext is enabled and will immediately
> * abort the BPF scheduler and print out a warning message.
> */
> - if (scx_hardlockup())
> + if (scx_hardlockup(cpu))
> return;
>
> /* Only print hardlockups once. */
>
© 2016 - 2026 Red Hat, Inc.