.../admin-guide/kernel-parameters.txt | 7 +++ include/linux/nmi.h | 6 +++ kernel/watchdog.c | 46 ++++++++++++++++++- kernel/watchdog_buddy.c | 7 +-- kernel/watchdog_perf.c | 10 ++-- lib/Kconfig.debug | 37 +++++++-------- 6 files changed, 85 insertions(+), 28 deletions(-)
Currently, the hard lockup detector is selected at compile time via
Kconfig, which requires a kernel rebuild to switch implementations.
This is inflexible, especially on systems where a perf event may not
be available or may be needed for other tasks.
This commit refactors the hard lockup detector to replace a rigid
compile-time choice with a flexible build-time and boot-time solution.
The patch supports building the kernel with either detector
independently, or with both. When both are built, a new boot parameter
`hardlockup_detector="perf|buddy"` allows the selection at boot time.
This is a more robust and user-friendly design.
This patch is a follow-up to the discussion on the kernel mailing list
regarding the preference and future of the hard lockup detectors. It
implements a flexible solution that addresses the community's need to
select an appropriate detector at boot time.
The core changes are:
- The `perf` and `buddy` watchdog implementations are separated into
distinct functions (e.g., `watchdog_perf_hardlockup_enable`).
- Global function pointers are introduced (`watchdog_hardlockup_enable_ptr`)
to serve as a single API for the entire feature.
- A new `hardlockup_detector=` boot parameter is added to allow the
user to select the desired detector at boot time.
- The Kconfig options are simplified by removing the complex
`HARDLOCKUP_DETECTOR_PREFER_BUDDY` and allowing both detectors to be
built without mutual exclusion.
- The weak stubs are updated to call the new function pointers,
centralizing the watchdog logic.
Link: https://lore.kernel.org/all/20250915035355.10846-1-cuiyunhui@bytedance.com/
Link: https://lore.kernel.org/all/CAD=FV=WWUiCi6bZCs_gseFpDDWNkuJMoL6XCftEo6W7q6jRCkg@mail.gmail.com/
Signed-off-by: Jinchao Wang <wangjinchao600@gmail.com>
---
.../admin-guide/kernel-parameters.txt | 7 +++
include/linux/nmi.h | 6 +++
kernel/watchdog.c | 46 ++++++++++++++++++-
kernel/watchdog_buddy.c | 7 +--
kernel/watchdog_perf.c | 10 ++--
lib/Kconfig.debug | 37 +++++++--------
6 files changed, 85 insertions(+), 28 deletions(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 5a7a83c411e9..0af214ee566c 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1828,6 +1828,13 @@
backtraces on all cpus.
Format: 0 | 1
+ hardlockup_detector=
+ [perf, buddy] Selects the hard lockup detector to use at
+ boot time.
+ Format: <string>
+ - "perf": Use the perf-based detector.
+ - "buddy": Use the buddy-based detector.
+
hash_pointers=
[KNL,EARLY]
By default, when pointers are printed to the console
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index cf3c6ab408aa..9298980ce572 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -100,6 +100,9 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs);
#endif
#if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF)
+void watchdog_perf_hardlockup_enable(unsigned int cpu);
+void watchdog_perf_hardlockup_disable(unsigned int cpu);
+extern int watchdog_perf_hardlockup_probe(void);
extern void hardlockup_detector_perf_stop(void);
extern void hardlockup_detector_perf_restart(void);
extern void hardlockup_config_perf_event(const char *str);
@@ -120,6 +123,9 @@ void watchdog_hardlockup_disable(unsigned int cpu);
void lockup_detector_reconfigure(void);
#ifdef CONFIG_HARDLOCKUP_DETECTOR_BUDDY
+void watchdog_buddy_hardlockup_enable(unsigned int cpu);
+void watchdog_buddy_hardlockup_disable(unsigned int cpu);
+int watchdog_buddy_hardlockup_probe(void);
void watchdog_buddy_check_hardlockup(int hrtimer_interrupts);
#else
static inline void watchdog_buddy_check_hardlockup(int hrtimer_interrupts) {}
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 80b56c002c7f..85451d24a77d 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -55,6 +55,37 @@ unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
#ifdef CONFIG_HARDLOCKUP_DETECTOR
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
+/* The global function pointers */
+void (*watchdog_hardlockup_enable_ptr)(unsigned int cpu) = watchdog_perf_hardlockup_enable;
+void (*watchdog_hardlockup_disable_ptr)(unsigned int cpu) = watchdog_perf_hardlockup_disable;
+int (*watchdog_hardlockup_probe_ptr)(void) = watchdog_perf_hardlockup_probe;
+#elif defined(CONFIG_HARDLOCKUP_DETECTOR_BUDDY)
+void (*watchdog_hardlockup_enable_ptr)(unsigned int cpu) = watchdog_buddy_hardlockup_enable;
+void (*watchdog_hardlockup_disable_ptr)(unsigned int cpu) = watchdog_buddy_hardlockup_disable;
+int (*watchdog_hardlockup_probe_ptr)(void) = watchdog_buddy_hardlockup_probe;
+#endif
+
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_MULTIPLE
+static char *hardlockup_detector_type = "perf"; /* Default to perf */
+static int __init set_hardlockup_detector_type(char *str)
+{
+ if (!strncmp(str, "perf", 4)) {
+ watchdog_hardlockup_enable_ptr = watchdog_perf_hardlockup_enable;
+ watchdog_hardlockup_disable_ptr = watchdog_perf_hardlockup_disable;
+ watchdog_hardlockup_probe_ptr = watchdog_perf_hardlockup_probe;
+ } else if (!strncmp(str, "buddy", 5)) {
+ watchdog_hardlockup_enable_ptr = watchdog_buddy_hardlockup_enable;
+ watchdog_hardlockup_disable_ptr = watchdog_buddy_hardlockup_disable;
+ watchdog_hardlockup_probe_ptr = watchdog_buddy_hardlockup_probe;
+ }
+ return 1;
+}
+
+__setup("hardlockup_detector=", set_hardlockup_detector_type);
+
+#endif
+
# ifdef CONFIG_SMP
int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
# endif /* CONFIG_SMP */
@@ -262,9 +293,17 @@ static inline void watchdog_hardlockup_kick(void) { }
* softlockup watchdog start and stop. The detector must select the
* SOFTLOCKUP_DETECTOR Kconfig.
*/
-void __weak watchdog_hardlockup_enable(unsigned int cpu) { }
+void __weak watchdog_hardlockup_enable(unsigned int cpu)
+{
+ if (watchdog_hardlockup_enable_ptr)
+ watchdog_hardlockup_enable_ptr(cpu);
+}
-void __weak watchdog_hardlockup_disable(unsigned int cpu) { }
+void __weak watchdog_hardlockup_disable(unsigned int cpu)
+{
+ if (watchdog_hardlockup_disable_ptr)
+ watchdog_hardlockup_disable_ptr(cpu);
+}
/*
* Watchdog-detector specific API.
@@ -275,6 +314,9 @@ void __weak watchdog_hardlockup_disable(unsigned int cpu) { }
*/
int __weak __init watchdog_hardlockup_probe(void)
{
+ if (watchdog_hardlockup_probe_ptr)
+ return watchdog_hardlockup_probe_ptr();
+
return -ENODEV;
}
diff --git a/kernel/watchdog_buddy.c b/kernel/watchdog_buddy.c
index ee754d767c21..390d89bfcafa 100644
--- a/kernel/watchdog_buddy.c
+++ b/kernel/watchdog_buddy.c
@@ -19,15 +19,16 @@ static unsigned int watchdog_next_cpu(unsigned int cpu)
return next_cpu;
}
-int __init watchdog_hardlockup_probe(void)
+int __init watchdog_buddy_hardlockup_probe(void)
{
return 0;
}
-void watchdog_hardlockup_enable(unsigned int cpu)
+void watchdog_buddy_hardlockup_enable(unsigned int cpu)
{
unsigned int next_cpu;
+ pr_info("ddddd %s\n", __func__);
/*
* The new CPU will be marked online before the hrtimer interrupt
* gets a chance to run on it. If another CPU tests for a
@@ -58,7 +59,7 @@ void watchdog_hardlockup_enable(unsigned int cpu)
cpumask_set_cpu(cpu, &watchdog_cpus);
}
-void watchdog_hardlockup_disable(unsigned int cpu)
+void watchdog_buddy_hardlockup_disable(unsigned int cpu)
{
unsigned int next_cpu = watchdog_next_cpu(cpu);
diff --git a/kernel/watchdog_perf.c b/kernel/watchdog_perf.c
index 9c58f5b4381d..270110e58f20 100644
--- a/kernel/watchdog_perf.c
+++ b/kernel/watchdog_perf.c
@@ -153,10 +153,12 @@ static int hardlockup_detector_event_create(void)
* watchdog_hardlockup_enable - Enable the local event
* @cpu: The CPU to enable hard lockup on.
*/
-void watchdog_hardlockup_enable(unsigned int cpu)
+void watchdog_perf_hardlockup_enable(unsigned int cpu)
{
WARN_ON_ONCE(cpu != smp_processor_id());
+ pr_info("ddddd %s\n", __func__);
+
if (hardlockup_detector_event_create())
return;
@@ -172,7 +174,7 @@ void watchdog_hardlockup_enable(unsigned int cpu)
* watchdog_hardlockup_disable - Disable the local event
* @cpu: The CPU to enable hard lockup on.
*/
-void watchdog_hardlockup_disable(unsigned int cpu)
+void watchdog_perf_hardlockup_disable(unsigned int cpu)
{
struct perf_event *event = this_cpu_read(watchdog_ev);
@@ -257,10 +259,12 @@ bool __weak __init arch_perf_nmi_is_available(void)
/**
* watchdog_hardlockup_probe - Probe whether NMI event is available at all
*/
-int __init watchdog_hardlockup_probe(void)
+int __init watchdog_perf_hardlockup_probe(void)
{
int ret;
+ pr_info("ddddd %s\n", __func__);
+
if (!arch_perf_nmi_is_available())
return -ENODEV;
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index dc0e0c6ed075..443353fad1c1 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1167,36 +1167,33 @@ config HARDLOCKUP_DETECTOR
#
# Note that arch-specific variants are always preferred.
#
-config HARDLOCKUP_DETECTOR_PREFER_BUDDY
- bool "Prefer the buddy CPU hardlockup detector"
- depends on HARDLOCKUP_DETECTOR
- depends on HAVE_HARDLOCKUP_DETECTOR_PERF && HAVE_HARDLOCKUP_DETECTOR_BUDDY
- depends on !HAVE_HARDLOCKUP_DETECTOR_ARCH
- help
- Say Y here to prefer the buddy hardlockup detector over the perf one.
-
- With the buddy detector, each CPU uses its softlockup hrtimer
- to check that the next CPU is processing hrtimer interrupts by
- verifying that a counter is increasing.
-
- This hardlockup detector is useful on systems that don't have
- an arch-specific hardlockup detector or if resources needed
- for the hardlockup detector are better used for other things.
-
config HARDLOCKUP_DETECTOR_PERF
- bool
+ bool "Enable perf-based hard lockup detector (preferred)"
depends on HARDLOCKUP_DETECTOR
- depends on HAVE_HARDLOCKUP_DETECTOR_PERF && !HARDLOCKUP_DETECTOR_PREFER_BUDDY
+ depends on HAVE_HARDLOCKUP_DETECTOR_PERF
depends on !HAVE_HARDLOCKUP_DETECTOR_ARCH
select HARDLOCKUP_DETECTOR_COUNTS_HRTIMER
+ help
+ This detector uses a perf event on the CPU to detect when a CPU
+ has become non-maskable interrupt (NMI) stuck. This is the
+ preferred method on modern systems as it can detect lockups on
+ all CPUs at the same time.
config HARDLOCKUP_DETECTOR_BUDDY
- bool
+ bool "Enable buddy-based hard lockup detector"
depends on HARDLOCKUP_DETECTOR
depends on HAVE_HARDLOCKUP_DETECTOR_BUDDY
- depends on !HAVE_HARDLOCKUP_DETECTOR_PERF || HARDLOCKUP_DETECTOR_PREFER_BUDDY
depends on !HAVE_HARDLOCKUP_DETECTOR_ARCH
select HARDLOCKUP_DETECTOR_COUNTS_HRTIMER
+ help
+ This is an alternative lockup detector that uses a heartbeat
+ mechanism between CPUs to detect when one has stopped responding.
+ It is less precise than the perf-based detector and cannot detect
+ all-CPU lockups, but it does not require a perf counter.
+
+config CONFIG_HARDLOCKUP_DETECTOR_MULTIPLE
+ bool
+ depends on HARDLOCKUP_DETECTOR_PERF && HARDLOCKUP_DETECTOR_BUDDY
config HARDLOCKUP_DETECTOR_ARCH
bool
--
2.43.0
Le 16/09/2025 à 16:50, Jinchao Wang a écrit : > Currently, the hard lockup detector is selected at compile time via > Kconfig, which requires a kernel rebuild to switch implementations. > This is inflexible, especially on systems where a perf event may not > be available or may be needed for other tasks. > > This commit refactors the hard lockup detector to replace a rigid > compile-time choice with a flexible build-time and boot-time solution. > The patch supports building the kernel with either detector > independently, or with both. When both are built, a new boot parameter > `hardlockup_detector="perf|buddy"` allows the selection at boot time. > This is a more robust and user-friendly design. > > This patch is a follow-up to the discussion on the kernel mailing list > regarding the preference and future of the hard lockup detectors. It > implements a flexible solution that addresses the community's need to > select an appropriate detector at boot time. > > The core changes are: > - The `perf` and `buddy` watchdog implementations are separated into > distinct functions (e.g., `watchdog_perf_hardlockup_enable`). > - Global function pointers are introduced (`watchdog_hardlockup_enable_ptr`) > to serve as a single API for the entire feature. > - A new `hardlockup_detector=` boot parameter is added to allow the > user to select the desired detector at boot time. > - The Kconfig options are simplified by removing the complex > `HARDLOCKUP_DETECTOR_PREFER_BUDDY` and allowing both detectors to be > built without mutual exclusion. > - The weak stubs are updated to call the new function pointers, > centralizing the watchdog logic. > > Link: https://lore.kernel.org/all/20250915035355.10846-1-cuiyunhui@bytedance.com/ > Link: https://lore.kernel.org/all/CAD=FV=WWUiCi6bZCs_gseFpDDWNkuJMoL6XCftEo6W7q6jRCkg@mail.gmail.com/ > > Signed-off-by: Jinchao Wang <wangjinchao600@gmail.com> > --- > .../admin-guide/kernel-parameters.txt | 7 +++ > include/linux/nmi.h | 6 +++ > kernel/watchdog.c | 46 ++++++++++++++++++- > kernel/watchdog_buddy.c | 7 +-- > kernel/watchdog_perf.c | 10 ++-- > lib/Kconfig.debug | 37 +++++++-------- > 6 files changed, 85 insertions(+), 28 deletions(-) > > diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt > index 5a7a83c411e9..0af214ee566c 100644 > --- a/Documentation/admin-guide/kernel-parameters.txt > +++ b/Documentation/admin-guide/kernel-parameters.txt > @@ -1828,6 +1828,13 @@ > backtraces on all cpus. > Format: 0 | 1 > > + hardlockup_detector= > + [perf, buddy] Selects the hard lockup detector to use at > + boot time. > + Format: <string> > + - "perf": Use the perf-based detector. > + - "buddy": Use the buddy-based detector. > + > hash_pointers= > [KNL,EARLY] > By default, when pointers are printed to the console > diff --git a/include/linux/nmi.h b/include/linux/nmi.h > index cf3c6ab408aa..9298980ce572 100644 > --- a/include/linux/nmi.h > +++ b/include/linux/nmi.h > @@ -100,6 +100,9 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs); > #endif > > #if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF) > +void watchdog_perf_hardlockup_enable(unsigned int cpu); > +void watchdog_perf_hardlockup_disable(unsigned int cpu); > +extern int watchdog_perf_hardlockup_probe(void); No 'extern' on function prototypes, this is pointless. > extern void hardlockup_detector_perf_stop(void); > extern void hardlockup_detector_perf_restart(void); > extern void hardlockup_config_perf_event(const char *str); > @@ -120,6 +123,9 @@ void watchdog_hardlockup_disable(unsigned int cpu); > void lockup_detector_reconfigure(void); > > #ifdef CONFIG_HARDLOCKUP_DETECTOR_BUDDY > +void watchdog_buddy_hardlockup_enable(unsigned int cpu); > +void watchdog_buddy_hardlockup_disable(unsigned int cpu); > +int watchdog_buddy_hardlockup_probe(void); > void watchdog_buddy_check_hardlockup(int hrtimer_interrupts); > #else > static inline void watchdog_buddy_check_hardlockup(int hrtimer_interrupts) {} > diff --git a/kernel/watchdog.c b/kernel/watchdog.c > index 80b56c002c7f..85451d24a77d 100644 > --- a/kernel/watchdog.c > +++ b/kernel/watchdog.c > @@ -55,6 +55,37 @@ unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask); > > #ifdef CONFIG_HARDLOCKUP_DETECTOR > > +#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF > +/* The global function pointers */ > +void (*watchdog_hardlockup_enable_ptr)(unsigned int cpu) = watchdog_perf_hardlockup_enable; > +void (*watchdog_hardlockup_disable_ptr)(unsigned int cpu) = watchdog_perf_hardlockup_disable; > +int (*watchdog_hardlockup_probe_ptr)(void) = watchdog_perf_hardlockup_probe; As this is set only once at startup, can we use static_call instead of function pointers ? Also, can it me made __ro_after_init ? > +#elif defined(CONFIG_HARDLOCKUP_DETECTOR_BUDDY) > +void (*watchdog_hardlockup_enable_ptr)(unsigned int cpu) = watchdog_buddy_hardlockup_enable; > +void (*watchdog_hardlockup_disable_ptr)(unsigned int cpu) = watchdog_buddy_hardlockup_disable; > +int (*watchdog_hardlockup_probe_ptr)(void) = watchdog_buddy_hardlockup_probe; > +#endif > + > +#ifdef CONFIG_HARDLOCKUP_DETECTOR_MULTIPLE > +static char *hardlockup_detector_type = "perf"; /* Default to perf */ > +static int __init set_hardlockup_detector_type(char *str) > +{ > + if (!strncmp(str, "perf", 4)) { Why strncmp ? What if I set 'hardlockup_detector=performance" ? > + watchdog_hardlockup_enable_ptr = watchdog_perf_hardlockup_enable; > + watchdog_hardlockup_disable_ptr = watchdog_perf_hardlockup_disable; > + watchdog_hardlockup_probe_ptr = watchdog_perf_hardlockup_probe; > + } else if (!strncmp(str, "buddy", 5)) { > + watchdog_hardlockup_enable_ptr = watchdog_buddy_hardlockup_enable; > + watchdog_hardlockup_disable_ptr = watchdog_buddy_hardlockup_disable; > + watchdog_hardlockup_probe_ptr = watchdog_buddy_hardlockup_probe; > + } > + return 1; > +} > + > +__setup("hardlockup_detector=", set_hardlockup_detector_type); > + > +#endif > + > # ifdef CONFIG_SMP > int __read_mostly sysctl_hardlockup_all_cpu_backtrace; > # endif /* CONFIG_SMP */ > @@ -262,9 +293,17 @@ static inline void watchdog_hardlockup_kick(void) { } > * softlockup watchdog start and stop. The detector must select the > * SOFTLOCKUP_DETECTOR Kconfig. > */ > -void __weak watchdog_hardlockup_enable(unsigned int cpu) { } > +void __weak watchdog_hardlockup_enable(unsigned int cpu) > +{ > + if (watchdog_hardlockup_enable_ptr) > + watchdog_hardlockup_enable_ptr(cpu); > +} This is a weak function so it can be overloaded. What happens then, for instance if the sparc architecture version of watchdog_hardlockup_enable() is called instead ? > > -void __weak watchdog_hardlockup_disable(unsigned int cpu) { } > +void __weak watchdog_hardlockup_disable(unsigned int cpu) > +{ > + if (watchdog_hardlockup_disable_ptr) > + watchdog_hardlockup_disable_ptr(cpu); > +} > > /* > * Watchdog-detector specific API. > @@ -275,6 +314,9 @@ void __weak watchdog_hardlockup_disable(unsigned int cpu) { } > */ > int __weak __init watchdog_hardlockup_probe(void) > { > + if (watchdog_hardlockup_probe_ptr) > + return watchdog_hardlockup_probe_ptr(); > + > return -ENODEV; > } > > diff --git a/kernel/watchdog_buddy.c b/kernel/watchdog_buddy.c > index ee754d767c21..390d89bfcafa 100644 > --- a/kernel/watchdog_buddy.c > +++ b/kernel/watchdog_buddy.c > @@ -19,15 +19,16 @@ static unsigned int watchdog_next_cpu(unsigned int cpu) > return next_cpu; > } > > -int __init watchdog_hardlockup_probe(void) > +int __init watchdog_buddy_hardlockup_probe(void) > { > return 0; > } > > -void watchdog_hardlockup_enable(unsigned int cpu) > +void watchdog_buddy_hardlockup_enable(unsigned int cpu) > { > unsigned int next_cpu; > > + pr_info("ddddd %s\n", __func__); Leftover from debuging ? > /* > * The new CPU will be marked online before the hrtimer interrupt > * gets a chance to run on it. If another CPU tests for a > @@ -58,7 +59,7 @@ void watchdog_hardlockup_enable(unsigned int cpu) > cpumask_set_cpu(cpu, &watchdog_cpus); > } > > -void watchdog_hardlockup_disable(unsigned int cpu) > +void watchdog_buddy_hardlockup_disable(unsigned int cpu) > { > unsigned int next_cpu = watchdog_next_cpu(cpu); > > diff --git a/kernel/watchdog_perf.c b/kernel/watchdog_perf.c > index 9c58f5b4381d..270110e58f20 100644 > --- a/kernel/watchdog_perf.c > +++ b/kernel/watchdog_perf.c > @@ -153,10 +153,12 @@ static int hardlockup_detector_event_create(void) > * watchdog_hardlockup_enable - Enable the local event > * @cpu: The CPU to enable hard lockup on. > */ > -void watchdog_hardlockup_enable(unsigned int cpu) > +void watchdog_perf_hardlockup_enable(unsigned int cpu) > { > WARN_ON_ONCE(cpu != smp_processor_id()); > > + pr_info("ddddd %s\n", __func__); > + > if (hardlockup_detector_event_create()) > return; > > @@ -172,7 +174,7 @@ void watchdog_hardlockup_enable(unsigned int cpu) > * watchdog_hardlockup_disable - Disable the local event > * @cpu: The CPU to enable hard lockup on. > */ > -void watchdog_hardlockup_disable(unsigned int cpu) > +void watchdog_perf_hardlockup_disable(unsigned int cpu) > { > struct perf_event *event = this_cpu_read(watchdog_ev); > > @@ -257,10 +259,12 @@ bool __weak __init arch_perf_nmi_is_available(void) > /** > * watchdog_hardlockup_probe - Probe whether NMI event is available at all > */ > -int __init watchdog_hardlockup_probe(void) > +int __init watchdog_perf_hardlockup_probe(void) > { > int ret; > > + pr_info("ddddd %s\n", __func__); > + > if (!arch_perf_nmi_is_available()) > return -ENODEV; > > diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug > index dc0e0c6ed075..443353fad1c1 100644 > --- a/lib/Kconfig.debug > +++ b/lib/Kconfig.debug > @@ -1167,36 +1167,33 @@ config HARDLOCKUP_DETECTOR > # > # Note that arch-specific variants are always preferred. > # > -config HARDLOCKUP_DETECTOR_PREFER_BUDDY > - bool "Prefer the buddy CPU hardlockup detector" > - depends on HARDLOCKUP_DETECTOR > - depends on HAVE_HARDLOCKUP_DETECTOR_PERF && HAVE_HARDLOCKUP_DETECTOR_BUDDY > - depends on !HAVE_HARDLOCKUP_DETECTOR_ARCH > - help > - Say Y here to prefer the buddy hardlockup detector over the perf one. > - > - With the buddy detector, each CPU uses its softlockup hrtimer > - to check that the next CPU is processing hrtimer interrupts by > - verifying that a counter is increasing. > - > - This hardlockup detector is useful on systems that don't have > - an arch-specific hardlockup detector or if resources needed > - for the hardlockup detector are better used for other things. > - > config HARDLOCKUP_DETECTOR_PERF > - bool > + bool "Enable perf-based hard lockup detector (preferred)" > depends on HARDLOCKUP_DETECTOR > - depends on HAVE_HARDLOCKUP_DETECTOR_PERF && !HARDLOCKUP_DETECTOR_PREFER_BUDDY > + depends on HAVE_HARDLOCKUP_DETECTOR_PERF > depends on !HAVE_HARDLOCKUP_DETECTOR_ARCH > select HARDLOCKUP_DETECTOR_COUNTS_HRTIMER > + help > + This detector uses a perf event on the CPU to detect when a CPU > + has become non-maskable interrupt (NMI) stuck. This is the > + preferred method on modern systems as it can detect lockups on > + all CPUs at the same time. > > config HARDLOCKUP_DETECTOR_BUDDY > - bool > + bool "Enable buddy-based hard lockup detector" > depends on HARDLOCKUP_DETECTOR > depends on HAVE_HARDLOCKUP_DETECTOR_BUDDY > - depends on !HAVE_HARDLOCKUP_DETECTOR_PERF || HARDLOCKUP_DETECTOR_PREFER_BUDDY > depends on !HAVE_HARDLOCKUP_DETECTOR_ARCH > select HARDLOCKUP_DETECTOR_COUNTS_HRTIMER > + help > + This is an alternative lockup detector that uses a heartbeat > + mechanism between CPUs to detect when one has stopped responding. > + It is less precise than the perf-based detector and cannot detect > + all-CPU lockups, but it does not require a perf counter. > + > +config CONFIG_HARDLOCKUP_DETECTOR_MULTIPLE > + bool > + depends on HARDLOCKUP_DETECTOR_PERF && HARDLOCKUP_DETECTOR_BUDDY > > config HARDLOCKUP_DETECTOR_ARCH > bool
On Wed, Sep 17, 2025 at 08:08:57AM +0200, Christophe Leroy wrote: > > > Le 16/09/2025 à 16:50, Jinchao Wang a écrit : > > Currently, the hard lockup detector is selected at compile time via > > Kconfig, which requires a kernel rebuild to switch implementations. > > This is inflexible, especially on systems where a perf event may not > > be available or may be needed for other tasks. > > > > This commit refactors the hard lockup detector to replace a rigid > > compile-time choice with a flexible build-time and boot-time solution. > > The patch supports building the kernel with either detector > > independently, or with both. When both are built, a new boot parameter > > `hardlockup_detector="perf|buddy"` allows the selection at boot time. > > This is a more robust and user-friendly design. > > > > This patch is a follow-up to the discussion on the kernel mailing list > > regarding the preference and future of the hard lockup detectors. It > > implements a flexible solution that addresses the community's need to > > select an appropriate detector at boot time. > > > > The core changes are: > > - The `perf` and `buddy` watchdog implementations are separated into > > distinct functions (e.g., `watchdog_perf_hardlockup_enable`). > > - Global function pointers are introduced (`watchdog_hardlockup_enable_ptr`) > > to serve as a single API for the entire feature. > > - A new `hardlockup_detector=` boot parameter is added to allow the > > user to select the desired detector at boot time. > > - The Kconfig options are simplified by removing the complex > > `HARDLOCKUP_DETECTOR_PREFER_BUDDY` and allowing both detectors to be > > built without mutual exclusion. > > - The weak stubs are updated to call the new function pointers, > > centralizing the watchdog logic. > > > > Link: https://lore.kernel.org/all/20250915035355.10846-1-cuiyunhui@bytedance.com/ > > Link: https://lore.kernel.org/all/CAD=FV=WWUiCi6bZCs_gseFpDDWNkuJMoL6XCftEo6W7q6jRCkg@mail.gmail.com/ > > > > Signed-off-by: Jinchao Wang <wangjinchao600@gmail.com> > > --- > > .../admin-guide/kernel-parameters.txt | 7 +++ > > include/linux/nmi.h | 6 +++ > > kernel/watchdog.c | 46 ++++++++++++++++++- > > kernel/watchdog_buddy.c | 7 +-- > > kernel/watchdog_perf.c | 10 ++-- > > lib/Kconfig.debug | 37 +++++++-------- > > 6 files changed, 85 insertions(+), 28 deletions(-) > > > > diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt > > index 5a7a83c411e9..0af214ee566c 100644 > > --- a/Documentation/admin-guide/kernel-parameters.txt > > +++ b/Documentation/admin-guide/kernel-parameters.txt > > @@ -1828,6 +1828,13 @@ > > backtraces on all cpus. > > Format: 0 | 1 > > + hardlockup_detector= > > + [perf, buddy] Selects the hard lockup detector to use at > > + boot time. > > + Format: <string> > > + - "perf": Use the perf-based detector. > > + - "buddy": Use the buddy-based detector. > > + > > hash_pointers= > > [KNL,EARLY] > > By default, when pointers are printed to the console > > diff --git a/include/linux/nmi.h b/include/linux/nmi.h > > index cf3c6ab408aa..9298980ce572 100644 > > --- a/include/linux/nmi.h > > +++ b/include/linux/nmi.h > > @@ -100,6 +100,9 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs); > > #endif > > #if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF) > > +void watchdog_perf_hardlockup_enable(unsigned int cpu); > > +void watchdog_perf_hardlockup_disable(unsigned int cpu); > > +extern int watchdog_perf_hardlockup_probe(void); > > No 'extern' on function prototypes, this is pointless. Got it. > > > extern void hardlockup_detector_perf_stop(void); > > extern void hardlockup_detector_perf_restart(void); > > extern void hardlockup_config_perf_event(const char *str); > > @@ -120,6 +123,9 @@ void watchdog_hardlockup_disable(unsigned int cpu); > > void lockup_detector_reconfigure(void); > > #ifdef CONFIG_HARDLOCKUP_DETECTOR_BUDDY > > +void watchdog_buddy_hardlockup_enable(unsigned int cpu); > > +void watchdog_buddy_hardlockup_disable(unsigned int cpu); > > +int watchdog_buddy_hardlockup_probe(void); > > void watchdog_buddy_check_hardlockup(int hrtimer_interrupts); > > #else > > static inline void watchdog_buddy_check_hardlockup(int hrtimer_interrupts) {} > > diff --git a/kernel/watchdog.c b/kernel/watchdog.c > > index 80b56c002c7f..85451d24a77d 100644 > > --- a/kernel/watchdog.c > > +++ b/kernel/watchdog.c > > @@ -55,6 +55,37 @@ unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask); > > #ifdef CONFIG_HARDLOCKUP_DETECTOR > > +#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF > > +/* The global function pointers */ > > +void (*watchdog_hardlockup_enable_ptr)(unsigned int cpu) = watchdog_perf_hardlockup_enable; > > +void (*watchdog_hardlockup_disable_ptr)(unsigned int cpu) = watchdog_perf_hardlockup_disable; > > +int (*watchdog_hardlockup_probe_ptr)(void) = watchdog_perf_hardlockup_probe; > > As this is set only once at startup, can we use static_call instead of > function pointers ? > > Also, can it me made __ro_after_init ? Not really, this is just an RFC patch, and there is no consensus yet. If it is included in the final consensus, I will handle it in the next version. > > > +#elif defined(CONFIG_HARDLOCKUP_DETECTOR_BUDDY) > > +void (*watchdog_hardlockup_enable_ptr)(unsigned int cpu) = watchdog_buddy_hardlockup_enable; > > +void (*watchdog_hardlockup_disable_ptr)(unsigned int cpu) = watchdog_buddy_hardlockup_disable; > > +int (*watchdog_hardlockup_probe_ptr)(void) = watchdog_buddy_hardlockup_probe; > > +#endif > > + > > +#ifdef CONFIG_HARDLOCKUP_DETECTOR_MULTIPLE > > +static char *hardlockup_detector_type = "perf"; /* Default to perf */ > > +static int __init set_hardlockup_detector_type(char *str) > > +{ > > + if (!strncmp(str, "perf", 4)) { > > Why strncmp ? Copy from hardlockup_panic_setup(). > > What if I set 'hardlockup_detector=performance" ? I think that is acceptable in this case. > > > > + watchdog_hardlockup_enable_ptr = watchdog_perf_hardlockup_enable; > > + watchdog_hardlockup_disable_ptr = watchdog_perf_hardlockup_disable; > > + watchdog_hardlockup_probe_ptr = watchdog_perf_hardlockup_probe; > > + } else if (!strncmp(str, "buddy", 5)) { > > + watchdog_hardlockup_enable_ptr = watchdog_buddy_hardlockup_enable; > > + watchdog_hardlockup_disable_ptr = watchdog_buddy_hardlockup_disable; > > + watchdog_hardlockup_probe_ptr = watchdog_buddy_hardlockup_probe; > > + } > > + return 1; > > +} > > + > > +__setup("hardlockup_detector=", set_hardlockup_detector_type); > > + > > +#endif > > + > > # ifdef CONFIG_SMP > > int __read_mostly sysctl_hardlockup_all_cpu_backtrace; > > # endif /* CONFIG_SMP */ > > @@ -262,9 +293,17 @@ static inline void watchdog_hardlockup_kick(void) { } > > * softlockup watchdog start and stop. The detector must select the > > * SOFTLOCKUP_DETECTOR Kconfig. > > */ > > -void __weak watchdog_hardlockup_enable(unsigned int cpu) { } > > +void __weak watchdog_hardlockup_enable(unsigned int cpu) > > +{ > > + if (watchdog_hardlockup_enable_ptr) > > + watchdog_hardlockup_enable_ptr(cpu); > > +} > > This is a weak function so it can be overloaded. What happens then, for > instance if the sparc architecture version of watchdog_hardlockup_enable() > is called instead ? It is a historical problem; I prefer using an #if condition instead. I had considered sparc arch, if sparc version is called, it is expected. Because the __weak functions only handle perf & buddy watchdog not the sparc watchdog. I think we should first resolve the consensus issue: - Should we keep both perf and buddy watchdogs? (probably yes already) - Should the watchdog type be changeable at boot time? - Should the watchdog type be changeable at runtime? How we handle these different watchdog types(maybe including sparc type) depends on the answers to these questions. What do you think? > > > -void __weak watchdog_hardlockup_disable(unsigned int cpu) { } > > +void __weak watchdog_hardlockup_disable(unsigned int cpu) > > +{ > > + if (watchdog_hardlockup_disable_ptr) > > + watchdog_hardlockup_disable_ptr(cpu); > > +} > > /* > > * Watchdog-detector specific API. > > @@ -275,6 +314,9 @@ void __weak watchdog_hardlockup_disable(unsigned int cpu) { } > > */ > > int __weak __init watchdog_hardlockup_probe(void) > > { > > + if (watchdog_hardlockup_probe_ptr) > > + return watchdog_hardlockup_probe_ptr(); > > + > > return -ENODEV; > > } > > diff --git a/kernel/watchdog_buddy.c b/kernel/watchdog_buddy.c > > index ee754d767c21..390d89bfcafa 100644 > > --- a/kernel/watchdog_buddy.c > > +++ b/kernel/watchdog_buddy.c > > @@ -19,15 +19,16 @@ static unsigned int watchdog_next_cpu(unsigned int cpu) > > return next_cpu; > > } > > -int __init watchdog_hardlockup_probe(void) > > +int __init watchdog_buddy_hardlockup_probe(void) > > { > > return 0; > > } > > -void watchdog_hardlockup_enable(unsigned int cpu) > > +void watchdog_buddy_hardlockup_enable(unsigned int cpu) > > { > > unsigned int next_cpu; > > + pr_info("ddddd %s\n", __func__); > > Leftover from debuging ? Forgot to delete the log, will fix if a v2 is needed. > > > /* > > * The new CPU will be marked online before the hrtimer interrupt > > * gets a chance to run on it. If another CPU tests for a > > @@ -58,7 +59,7 @@ void watchdog_hardlockup_enable(unsigned int cpu) > > cpumask_set_cpu(cpu, &watchdog_cpus); > > } > > -void watchdog_hardlockup_disable(unsigned int cpu) > > +void watchdog_buddy_hardlockup_disable(unsigned int cpu) > > { > > unsigned int next_cpu = watchdog_next_cpu(cpu); > > diff --git a/kernel/watchdog_perf.c b/kernel/watchdog_perf.c > > index 9c58f5b4381d..270110e58f20 100644 > > --- a/kernel/watchdog_perf.c > > +++ b/kernel/watchdog_perf.c > > @@ -153,10 +153,12 @@ static int hardlockup_detector_event_create(void) > > * watchdog_hardlockup_enable - Enable the local event > > * @cpu: The CPU to enable hard lockup on. > > */ > > -void watchdog_hardlockup_enable(unsigned int cpu) > > +void watchdog_perf_hardlockup_enable(unsigned int cpu) > > { > > WARN_ON_ONCE(cpu != smp_processor_id()); > > + pr_info("ddddd %s\n", __func__); > > + > > if (hardlockup_detector_event_create()) > > return; > > @@ -172,7 +174,7 @@ void watchdog_hardlockup_enable(unsigned int cpu) > > * watchdog_hardlockup_disable - Disable the local event > > * @cpu: The CPU to enable hard lockup on. > > */ > > -void watchdog_hardlockup_disable(unsigned int cpu) > > +void watchdog_perf_hardlockup_disable(unsigned int cpu) > > { > > struct perf_event *event = this_cpu_read(watchdog_ev); > > @@ -257,10 +259,12 @@ bool __weak __init arch_perf_nmi_is_available(void) > > /** > > * watchdog_hardlockup_probe - Probe whether NMI event is available at all > > */ > > -int __init watchdog_hardlockup_probe(void) > > +int __init watchdog_perf_hardlockup_probe(void) > > { > > int ret; > > + pr_info("ddddd %s\n", __func__); > > + > > if (!arch_perf_nmi_is_available()) > > return -ENODEV; > > diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug > > index dc0e0c6ed075..443353fad1c1 100644 > > --- a/lib/Kconfig.debug > > +++ b/lib/Kconfig.debug > > @@ -1167,36 +1167,33 @@ config HARDLOCKUP_DETECTOR > > # > > # Note that arch-specific variants are always preferred. > > # > > -config HARDLOCKUP_DETECTOR_PREFER_BUDDY > > - bool "Prefer the buddy CPU hardlockup detector" > > - depends on HARDLOCKUP_DETECTOR > > - depends on HAVE_HARDLOCKUP_DETECTOR_PERF && HAVE_HARDLOCKUP_DETECTOR_BUDDY > > - depends on !HAVE_HARDLOCKUP_DETECTOR_ARCH > > - help > > - Say Y here to prefer the buddy hardlockup detector over the perf one. > > - > > - With the buddy detector, each CPU uses its softlockup hrtimer > > - to check that the next CPU is processing hrtimer interrupts by > > - verifying that a counter is increasing. > > - > > - This hardlockup detector is useful on systems that don't have > > - an arch-specific hardlockup detector or if resources needed > > - for the hardlockup detector are better used for other things. > > - > > config HARDLOCKUP_DETECTOR_PERF > > - bool > > + bool "Enable perf-based hard lockup detector (preferred)" > > depends on HARDLOCKUP_DETECTOR > > - depends on HAVE_HARDLOCKUP_DETECTOR_PERF && !HARDLOCKUP_DETECTOR_PREFER_BUDDY > > + depends on HAVE_HARDLOCKUP_DETECTOR_PERF > > depends on !HAVE_HARDLOCKUP_DETECTOR_ARCH > > select HARDLOCKUP_DETECTOR_COUNTS_HRTIMER > > + help > > + This detector uses a perf event on the CPU to detect when a CPU > > + has become non-maskable interrupt (NMI) stuck. This is the > > + preferred method on modern systems as it can detect lockups on > > + all CPUs at the same time. > > config HARDLOCKUP_DETECTOR_BUDDY > > - bool > > + bool "Enable buddy-based hard lockup detector" > > depends on HARDLOCKUP_DETECTOR > > depends on HAVE_HARDLOCKUP_DETECTOR_BUDDY > > - depends on !HAVE_HARDLOCKUP_DETECTOR_PERF || HARDLOCKUP_DETECTOR_PREFER_BUDDY > > depends on !HAVE_HARDLOCKUP_DETECTOR_ARCH > > select HARDLOCKUP_DETECTOR_COUNTS_HRTIMER > > + help > > + This is an alternative lockup detector that uses a heartbeat > > + mechanism between CPUs to detect when one has stopped responding. > > + It is less precise than the perf-based detector and cannot detect > > + all-CPU lockups, but it does not require a perf counter. > > + > > +config CONFIG_HARDLOCKUP_DETECTOR_MULTIPLE > > + bool > > + depends on HARDLOCKUP_DETECTOR_PERF && HARDLOCKUP_DETECTOR_BUDDY > > config HARDLOCKUP_DETECTOR_ARCH > > bool >
On Tue, Sep 16, 2025 at 7:51 AM Jinchao Wang <wangjinchao600@gmail.com> wrote: > > Currently, the hard lockup detector is selected at compile time via > Kconfig, which requires a kernel rebuild to switch implementations. > This is inflexible, especially on systems where a perf event may not > be available or may be needed for other tasks. > > This commit refactors the hard lockup detector to replace a rigid > compile-time choice with a flexible build-time and boot-time solution. > The patch supports building the kernel with either detector > independently, or with both. When both are built, a new boot parameter > `hardlockup_detector="perf|buddy"` allows the selection at boot time. > This is a more robust and user-friendly design. > > This patch is a follow-up to the discussion on the kernel mailing list > regarding the preference and future of the hard lockup detectors. It > implements a flexible solution that addresses the community's need to > select an appropriate detector at boot time. > > The core changes are: > - The `perf` and `buddy` watchdog implementations are separated into > distinct functions (e.g., `watchdog_perf_hardlockup_enable`). > - Global function pointers are introduced (`watchdog_hardlockup_enable_ptr`) > to serve as a single API for the entire feature. > - A new `hardlockup_detector=` boot parameter is added to allow the > user to select the desired detector at boot time. > - The Kconfig options are simplified by removing the complex > `HARDLOCKUP_DETECTOR_PREFER_BUDDY` and allowing both detectors to be > built without mutual exclusion. > - The weak stubs are updated to call the new function pointers, > centralizing the watchdog logic. What is the impact on /proc/sys/kernel/nmi_watchdog ? Is that enabling and disabling whatever the boot time choice was? I'm not sure why this has to be a boot time option given the ability to configure via /proc/sys/kernel/nmi_watchdog. > Link: https://lore.kernel.org/all/20250915035355.10846-1-cuiyunhui@bytedance.com/ > Link: https://lore.kernel.org/all/CAD=FV=WWUiCi6bZCs_gseFpDDWNkuJMoL6XCftEo6W7q6jRCkg@mail.gmail.com/ > > Signed-off-by: Jinchao Wang <wangjinchao600@gmail.com> > --- > .../admin-guide/kernel-parameters.txt | 7 +++ > include/linux/nmi.h | 6 +++ > kernel/watchdog.c | 46 ++++++++++++++++++- > kernel/watchdog_buddy.c | 7 +-- > kernel/watchdog_perf.c | 10 ++-- > lib/Kconfig.debug | 37 +++++++-------- > 6 files changed, 85 insertions(+), 28 deletions(-) > > diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt > index 5a7a83c411e9..0af214ee566c 100644 > --- a/Documentation/admin-guide/kernel-parameters.txt > +++ b/Documentation/admin-guide/kernel-parameters.txt > @@ -1828,6 +1828,13 @@ > backtraces on all cpus. > Format: 0 | 1 > > + hardlockup_detector= > + [perf, buddy] Selects the hard lockup detector to use at > + boot time. > + Format: <string> > + - "perf": Use the perf-based detector. > + - "buddy": Use the buddy-based detector. > + > hash_pointers= > [KNL,EARLY] > By default, when pointers are printed to the console > diff --git a/include/linux/nmi.h b/include/linux/nmi.h > index cf3c6ab408aa..9298980ce572 100644 > --- a/include/linux/nmi.h > +++ b/include/linux/nmi.h > @@ -100,6 +100,9 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs); > #endif > > #if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF) > +void watchdog_perf_hardlockup_enable(unsigned int cpu); > +void watchdog_perf_hardlockup_disable(unsigned int cpu); > +extern int watchdog_perf_hardlockup_probe(void); > extern void hardlockup_detector_perf_stop(void); > extern void hardlockup_detector_perf_restart(void); > extern void hardlockup_config_perf_event(const char *str); > @@ -120,6 +123,9 @@ void watchdog_hardlockup_disable(unsigned int cpu); > void lockup_detector_reconfigure(void); > > #ifdef CONFIG_HARDLOCKUP_DETECTOR_BUDDY > +void watchdog_buddy_hardlockup_enable(unsigned int cpu); > +void watchdog_buddy_hardlockup_disable(unsigned int cpu); > +int watchdog_buddy_hardlockup_probe(void); > void watchdog_buddy_check_hardlockup(int hrtimer_interrupts); > #else > static inline void watchdog_buddy_check_hardlockup(int hrtimer_interrupts) {} > diff --git a/kernel/watchdog.c b/kernel/watchdog.c > index 80b56c002c7f..85451d24a77d 100644 > --- a/kernel/watchdog.c > +++ b/kernel/watchdog.c > @@ -55,6 +55,37 @@ unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask); > > #ifdef CONFIG_HARDLOCKUP_DETECTOR > > +#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF > +/* The global function pointers */ > +void (*watchdog_hardlockup_enable_ptr)(unsigned int cpu) = watchdog_perf_hardlockup_enable; > +void (*watchdog_hardlockup_disable_ptr)(unsigned int cpu) = watchdog_perf_hardlockup_disable; > +int (*watchdog_hardlockup_probe_ptr)(void) = watchdog_perf_hardlockup_probe; > +#elif defined(CONFIG_HARDLOCKUP_DETECTOR_BUDDY) > +void (*watchdog_hardlockup_enable_ptr)(unsigned int cpu) = watchdog_buddy_hardlockup_enable; > +void (*watchdog_hardlockup_disable_ptr)(unsigned int cpu) = watchdog_buddy_hardlockup_disable; > +int (*watchdog_hardlockup_probe_ptr)(void) = watchdog_buddy_hardlockup_probe; > +#endif > + > +#ifdef CONFIG_HARDLOCKUP_DETECTOR_MULTIPLE > +static char *hardlockup_detector_type = "perf"; /* Default to perf */ > +static int __init set_hardlockup_detector_type(char *str) > +{ > + if (!strncmp(str, "perf", 4)) { > + watchdog_hardlockup_enable_ptr = watchdog_perf_hardlockup_enable; > + watchdog_hardlockup_disable_ptr = watchdog_perf_hardlockup_disable; > + watchdog_hardlockup_probe_ptr = watchdog_perf_hardlockup_probe; > + } else if (!strncmp(str, "buddy", 5)) { > + watchdog_hardlockup_enable_ptr = watchdog_buddy_hardlockup_enable; > + watchdog_hardlockup_disable_ptr = watchdog_buddy_hardlockup_disable; > + watchdog_hardlockup_probe_ptr = watchdog_buddy_hardlockup_probe; > + } > + return 1; > +} > + > +__setup("hardlockup_detector=", set_hardlockup_detector_type); > + > +#endif > + > # ifdef CONFIG_SMP > int __read_mostly sysctl_hardlockup_all_cpu_backtrace; > # endif /* CONFIG_SMP */ > @@ -262,9 +293,17 @@ static inline void watchdog_hardlockup_kick(void) { } > * softlockup watchdog start and stop. The detector must select the > * SOFTLOCKUP_DETECTOR Kconfig. > */ > -void __weak watchdog_hardlockup_enable(unsigned int cpu) { } > +void __weak watchdog_hardlockup_enable(unsigned int cpu) > +{ > + if (watchdog_hardlockup_enable_ptr) > + watchdog_hardlockup_enable_ptr(cpu); > +} > > -void __weak watchdog_hardlockup_disable(unsigned int cpu) { } > +void __weak watchdog_hardlockup_disable(unsigned int cpu) > +{ > + if (watchdog_hardlockup_disable_ptr) > + watchdog_hardlockup_disable_ptr(cpu); > +} > > /* > * Watchdog-detector specific API. > @@ -275,6 +314,9 @@ void __weak watchdog_hardlockup_disable(unsigned int cpu) { } > */ > int __weak __init watchdog_hardlockup_probe(void) > { > + if (watchdog_hardlockup_probe_ptr) > + return watchdog_hardlockup_probe_ptr(); > + > return -ENODEV; > } > > diff --git a/kernel/watchdog_buddy.c b/kernel/watchdog_buddy.c > index ee754d767c21..390d89bfcafa 100644 > --- a/kernel/watchdog_buddy.c > +++ b/kernel/watchdog_buddy.c > @@ -19,15 +19,16 @@ static unsigned int watchdog_next_cpu(unsigned int cpu) > return next_cpu; > } > > -int __init watchdog_hardlockup_probe(void) > +int __init watchdog_buddy_hardlockup_probe(void) > { > return 0; > } > > -void watchdog_hardlockup_enable(unsigned int cpu) > +void watchdog_buddy_hardlockup_enable(unsigned int cpu) > { > unsigned int next_cpu; > > + pr_info("ddddd %s\n", __func__); > /* > * The new CPU will be marked online before the hrtimer interrupt > * gets a chance to run on it. If another CPU tests for a > @@ -58,7 +59,7 @@ void watchdog_hardlockup_enable(unsigned int cpu) > cpumask_set_cpu(cpu, &watchdog_cpus); > } > > -void watchdog_hardlockup_disable(unsigned int cpu) > +void watchdog_buddy_hardlockup_disable(unsigned int cpu) > { > unsigned int next_cpu = watchdog_next_cpu(cpu); > > diff --git a/kernel/watchdog_perf.c b/kernel/watchdog_perf.c > index 9c58f5b4381d..270110e58f20 100644 > --- a/kernel/watchdog_perf.c > +++ b/kernel/watchdog_perf.c > @@ -153,10 +153,12 @@ static int hardlockup_detector_event_create(void) > * watchdog_hardlockup_enable - Enable the local event > * @cpu: The CPU to enable hard lockup on. > */ > -void watchdog_hardlockup_enable(unsigned int cpu) > +void watchdog_perf_hardlockup_enable(unsigned int cpu) > { > WARN_ON_ONCE(cpu != smp_processor_id()); > > + pr_info("ddddd %s\n", __func__); > + > if (hardlockup_detector_event_create()) > return; > > @@ -172,7 +174,7 @@ void watchdog_hardlockup_enable(unsigned int cpu) > * watchdog_hardlockup_disable - Disable the local event > * @cpu: The CPU to enable hard lockup on. > */ > -void watchdog_hardlockup_disable(unsigned int cpu) > +void watchdog_perf_hardlockup_disable(unsigned int cpu) > { > struct perf_event *event = this_cpu_read(watchdog_ev); > > @@ -257,10 +259,12 @@ bool __weak __init arch_perf_nmi_is_available(void) > /** > * watchdog_hardlockup_probe - Probe whether NMI event is available at all > */ > -int __init watchdog_hardlockup_probe(void) > +int __init watchdog_perf_hardlockup_probe(void) > { > int ret; > > + pr_info("ddddd %s\n", __func__); > + > if (!arch_perf_nmi_is_available()) > return -ENODEV; > > diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug > index dc0e0c6ed075..443353fad1c1 100644 > --- a/lib/Kconfig.debug > +++ b/lib/Kconfig.debug > @@ -1167,36 +1167,33 @@ config HARDLOCKUP_DETECTOR > # > # Note that arch-specific variants are always preferred. > # > -config HARDLOCKUP_DETECTOR_PREFER_BUDDY > - bool "Prefer the buddy CPU hardlockup detector" > - depends on HARDLOCKUP_DETECTOR > - depends on HAVE_HARDLOCKUP_DETECTOR_PERF && HAVE_HARDLOCKUP_DETECTOR_BUDDY > - depends on !HAVE_HARDLOCKUP_DETECTOR_ARCH > - help > - Say Y here to prefer the buddy hardlockup detector over the perf one. > - > - With the buddy detector, each CPU uses its softlockup hrtimer > - to check that the next CPU is processing hrtimer interrupts by > - verifying that a counter is increasing. > - > - This hardlockup detector is useful on systems that don't have > - an arch-specific hardlockup detector or if resources needed > - for the hardlockup detector are better used for other things. > - > config HARDLOCKUP_DETECTOR_PERF > - bool > + bool "Enable perf-based hard lockup detector (preferred)" > depends on HARDLOCKUP_DETECTOR > - depends on HAVE_HARDLOCKUP_DETECTOR_PERF && !HARDLOCKUP_DETECTOR_PREFER_BUDDY > + depends on HAVE_HARDLOCKUP_DETECTOR_PERF > depends on !HAVE_HARDLOCKUP_DETECTOR_ARCH > select HARDLOCKUP_DETECTOR_COUNTS_HRTIMER > + help > + This detector uses a perf event on the CPU to detect when a CPU > + has become non-maskable interrupt (NMI) stuck. This is the > + preferred method on modern systems as it can detect lockups on > + all CPUs at the same time. I'd say this option should be the default for kernel developers but shouldn't be used by default to free the perf event and due to the extra power overhead. Thanks, Ian > config HARDLOCKUP_DETECTOR_BUDDY > - bool > + bool "Enable buddy-based hard lockup detector" > depends on HARDLOCKUP_DETECTOR > depends on HAVE_HARDLOCKUP_DETECTOR_BUDDY > - depends on !HAVE_HARDLOCKUP_DETECTOR_PERF || HARDLOCKUP_DETECTOR_PREFER_BUDDY > depends on !HAVE_HARDLOCKUP_DETECTOR_ARCH > select HARDLOCKUP_DETECTOR_COUNTS_HRTIMER > + help > + This is an alternative lockup detector that uses a heartbeat > + mechanism between CPUs to detect when one has stopped responding. > + It is less precise than the perf-based detector and cannot detect > + all-CPU lockups, but it does not require a perf counter. > + > +config CONFIG_HARDLOCKUP_DETECTOR_MULTIPLE > + bool > + depends on HARDLOCKUP_DETECTOR_PERF && HARDLOCKUP_DETECTOR_BUDDY > > config HARDLOCKUP_DETECTOR_ARCH > bool > -- > 2.43.0 >
On Tue, Sep 16, 2025 at 05:03:48PM -0700, Ian Rogers wrote: > On Tue, Sep 16, 2025 at 7:51 AM Jinchao Wang <wangjinchao600@gmail.com> wrote: > > > > Currently, the hard lockup detector is selected at compile time via > > Kconfig, which requires a kernel rebuild to switch implementations. > > This is inflexible, especially on systems where a perf event may not > > be available or may be needed for other tasks. > > > > This commit refactors the hard lockup detector to replace a rigid > > compile-time choice with a flexible build-time and boot-time solution. > > The patch supports building the kernel with either detector > > independently, or with both. When both are built, a new boot parameter > > `hardlockup_detector="perf|buddy"` allows the selection at boot time. > > This is a more robust and user-friendly design. > > > > This patch is a follow-up to the discussion on the kernel mailing list > > regarding the preference and future of the hard lockup detectors. It > > implements a flexible solution that addresses the community's need to > > select an appropriate detector at boot time. > > > > The core changes are: > > - The `perf` and `buddy` watchdog implementations are separated into > > distinct functions (e.g., `watchdog_perf_hardlockup_enable`). > > - Global function pointers are introduced (`watchdog_hardlockup_enable_ptr`) > > to serve as a single API for the entire feature. > > - A new `hardlockup_detector=` boot parameter is added to allow the > > user to select the desired detector at boot time. > > - The Kconfig options are simplified by removing the complex > > `HARDLOCKUP_DETECTOR_PREFER_BUDDY` and allowing both detectors to be > > built without mutual exclusion. > > - The weak stubs are updated to call the new function pointers, > > centralizing the watchdog logic. > > What is the impact on /proc/sys/kernel/nmi_watchdog ? Is that > enabling and disabling whatever the boot time choice was? I'm not sure > why this has to be a boot time option given the ability to configure > via /proc/sys/kernel/nmi_watchdog. The new hardlockup_detector boot parameter and the existing /proc/sys/kernel/nmi_watchdog file serve different purposes. The boot parameter selects the type of hard lockup detector (perf or buddy). This choice is made once at boot. /proc/sys/kernel/nmi_watchdog, on the other hand, is only a simple on/off switch for the currently selected detector. It does not change the detector's type. > > > Link: https://lore.kernel.org/all/20250915035355.10846-1-cuiyunhui@bytedance.com/ > > Link: https://lore.kernel.org/all/CAD=FV=WWUiCi6bZCs_gseFpDDWNkuJMoL6XCftEo6W7q6jRCkg@mail.gmail.com/ > > > > Signed-off-by: Jinchao Wang <wangjinchao600@gmail.com> > > --- > > .../admin-guide/kernel-parameters.txt | 7 +++ > > include/linux/nmi.h | 6 +++ > > kernel/watchdog.c | 46 ++++++++++++++++++- > > kernel/watchdog_buddy.c | 7 +-- > > kernel/watchdog_perf.c | 10 ++-- > > lib/Kconfig.debug | 37 +++++++-------- > > 6 files changed, 85 insertions(+), 28 deletions(-) > > > > diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt > > index 5a7a83c411e9..0af214ee566c 100644 > > --- a/Documentation/admin-guide/kernel-parameters.txt > > +++ b/Documentation/admin-guide/kernel-parameters.txt > > @@ -1828,6 +1828,13 @@ > > backtraces on all cpus. > > Format: 0 | 1 > > > > + hardlockup_detector= > > + [perf, buddy] Selects the hard lockup detector to use at > > + boot time. > > + Format: <string> > > + - "perf": Use the perf-based detector. > > + - "buddy": Use the buddy-based detector. > > + > > hash_pointers= > > [KNL,EARLY] > > By default, when pointers are printed to the console > > diff --git a/include/linux/nmi.h b/include/linux/nmi.h > > index cf3c6ab408aa..9298980ce572 100644 > > --- a/include/linux/nmi.h > > +++ b/include/linux/nmi.h > > @@ -100,6 +100,9 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs); > > #endif > > > > #if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF) > > +void watchdog_perf_hardlockup_enable(unsigned int cpu); > > +void watchdog_perf_hardlockup_disable(unsigned int cpu); > > +extern int watchdog_perf_hardlockup_probe(void); > > extern void hardlockup_detector_perf_stop(void); > > extern void hardlockup_detector_perf_restart(void); > > extern void hardlockup_config_perf_event(const char *str); > > @@ -120,6 +123,9 @@ void watchdog_hardlockup_disable(unsigned int cpu); > > void lockup_detector_reconfigure(void); > > > > #ifdef CONFIG_HARDLOCKUP_DETECTOR_BUDDY > > +void watchdog_buddy_hardlockup_enable(unsigned int cpu); > > +void watchdog_buddy_hardlockup_disable(unsigned int cpu); > > +int watchdog_buddy_hardlockup_probe(void); > > void watchdog_buddy_check_hardlockup(int hrtimer_interrupts); > > #else > > static inline void watchdog_buddy_check_hardlockup(int hrtimer_interrupts) {} > > diff --git a/kernel/watchdog.c b/kernel/watchdog.c > > index 80b56c002c7f..85451d24a77d 100644 > > --- a/kernel/watchdog.c > > +++ b/kernel/watchdog.c > > @@ -55,6 +55,37 @@ unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask); > > > > #ifdef CONFIG_HARDLOCKUP_DETECTOR > > > > +#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF > > +/* The global function pointers */ > > +void (*watchdog_hardlockup_enable_ptr)(unsigned int cpu) = watchdog_perf_hardlockup_enable; > > +void (*watchdog_hardlockup_disable_ptr)(unsigned int cpu) = watchdog_perf_hardlockup_disable; > > +int (*watchdog_hardlockup_probe_ptr)(void) = watchdog_perf_hardlockup_probe; > > +#elif defined(CONFIG_HARDLOCKUP_DETECTOR_BUDDY) > > +void (*watchdog_hardlockup_enable_ptr)(unsigned int cpu) = watchdog_buddy_hardlockup_enable; > > +void (*watchdog_hardlockup_disable_ptr)(unsigned int cpu) = watchdog_buddy_hardlockup_disable; > > +int (*watchdog_hardlockup_probe_ptr)(void) = watchdog_buddy_hardlockup_probe; > > +#endif > > + > > +#ifdef CONFIG_HARDLOCKUP_DETECTOR_MULTIPLE > > +static char *hardlockup_detector_type = "perf"; /* Default to perf */ > > +static int __init set_hardlockup_detector_type(char *str) > > +{ > > + if (!strncmp(str, "perf", 4)) { > > + watchdog_hardlockup_enable_ptr = watchdog_perf_hardlockup_enable; > > + watchdog_hardlockup_disable_ptr = watchdog_perf_hardlockup_disable; > > + watchdog_hardlockup_probe_ptr = watchdog_perf_hardlockup_probe; > > + } else if (!strncmp(str, "buddy", 5)) { > > + watchdog_hardlockup_enable_ptr = watchdog_buddy_hardlockup_enable; > > + watchdog_hardlockup_disable_ptr = watchdog_buddy_hardlockup_disable; > > + watchdog_hardlockup_probe_ptr = watchdog_buddy_hardlockup_probe; > > + } > > + return 1; > > +} > > + > > +__setup("hardlockup_detector=", set_hardlockup_detector_type); > > + > > +#endif > > + > > # ifdef CONFIG_SMP > > int __read_mostly sysctl_hardlockup_all_cpu_backtrace; > > # endif /* CONFIG_SMP */ > > @@ -262,9 +293,17 @@ static inline void watchdog_hardlockup_kick(void) { } > > * softlockup watchdog start and stop. The detector must select the > > * SOFTLOCKUP_DETECTOR Kconfig. > > */ > > -void __weak watchdog_hardlockup_enable(unsigned int cpu) { } > > +void __weak watchdog_hardlockup_enable(unsigned int cpu) > > +{ > > + if (watchdog_hardlockup_enable_ptr) > > + watchdog_hardlockup_enable_ptr(cpu); > > +} > > > > -void __weak watchdog_hardlockup_disable(unsigned int cpu) { } > > +void __weak watchdog_hardlockup_disable(unsigned int cpu) > > +{ > > + if (watchdog_hardlockup_disable_ptr) > > + watchdog_hardlockup_disable_ptr(cpu); > > +} > > > > /* > > * Watchdog-detector specific API. > > @@ -275,6 +314,9 @@ void __weak watchdog_hardlockup_disable(unsigned int cpu) { } > > */ > > int __weak __init watchdog_hardlockup_probe(void) > > { > > + if (watchdog_hardlockup_probe_ptr) > > + return watchdog_hardlockup_probe_ptr(); > > + > > return -ENODEV; > > } > > > > diff --git a/kernel/watchdog_buddy.c b/kernel/watchdog_buddy.c > > index ee754d767c21..390d89bfcafa 100644 > > --- a/kernel/watchdog_buddy.c > > +++ b/kernel/watchdog_buddy.c > > @@ -19,15 +19,16 @@ static unsigned int watchdog_next_cpu(unsigned int cpu) > > return next_cpu; > > } > > > > -int __init watchdog_hardlockup_probe(void) > > +int __init watchdog_buddy_hardlockup_probe(void) > > { > > return 0; > > } > > > > -void watchdog_hardlockup_enable(unsigned int cpu) > > +void watchdog_buddy_hardlockup_enable(unsigned int cpu) > > { > > unsigned int next_cpu; > > > > + pr_info("ddddd %s\n", __func__); > > /* > > * The new CPU will be marked online before the hrtimer interrupt > > * gets a chance to run on it. If another CPU tests for a > > @@ -58,7 +59,7 @@ void watchdog_hardlockup_enable(unsigned int cpu) > > cpumask_set_cpu(cpu, &watchdog_cpus); > > } > > > > -void watchdog_hardlockup_disable(unsigned int cpu) > > +void watchdog_buddy_hardlockup_disable(unsigned int cpu) > > { > > unsigned int next_cpu = watchdog_next_cpu(cpu); > > > > diff --git a/kernel/watchdog_perf.c b/kernel/watchdog_perf.c > > index 9c58f5b4381d..270110e58f20 100644 > > --- a/kernel/watchdog_perf.c > > +++ b/kernel/watchdog_perf.c > > @@ -153,10 +153,12 @@ static int hardlockup_detector_event_create(void) > > * watchdog_hardlockup_enable - Enable the local event > > * @cpu: The CPU to enable hard lockup on. > > */ > > -void watchdog_hardlockup_enable(unsigned int cpu) > > +void watchdog_perf_hardlockup_enable(unsigned int cpu) > > { > > WARN_ON_ONCE(cpu != smp_processor_id()); > > > > + pr_info("ddddd %s\n", __func__); > > + > > if (hardlockup_detector_event_create()) > > return; > > > > @@ -172,7 +174,7 @@ void watchdog_hardlockup_enable(unsigned int cpu) > > * watchdog_hardlockup_disable - Disable the local event > > * @cpu: The CPU to enable hard lockup on. > > */ > > -void watchdog_hardlockup_disable(unsigned int cpu) > > +void watchdog_perf_hardlockup_disable(unsigned int cpu) > > { > > struct perf_event *event = this_cpu_read(watchdog_ev); > > > > @@ -257,10 +259,12 @@ bool __weak __init arch_perf_nmi_is_available(void) > > /** > > * watchdog_hardlockup_probe - Probe whether NMI event is available at all > > */ > > -int __init watchdog_hardlockup_probe(void) > > +int __init watchdog_perf_hardlockup_probe(void) > > { > > int ret; > > > > + pr_info("ddddd %s\n", __func__); > > + > > if (!arch_perf_nmi_is_available()) > > return -ENODEV; > > > > diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug > > index dc0e0c6ed075..443353fad1c1 100644 > > --- a/lib/Kconfig.debug > > +++ b/lib/Kconfig.debug > > @@ -1167,36 +1167,33 @@ config HARDLOCKUP_DETECTOR > > # > > # Note that arch-specific variants are always preferred. > > # > > -config HARDLOCKUP_DETECTOR_PREFER_BUDDY > > - bool "Prefer the buddy CPU hardlockup detector" > > - depends on HARDLOCKUP_DETECTOR > > - depends on HAVE_HARDLOCKUP_DETECTOR_PERF && HAVE_HARDLOCKUP_DETECTOR_BUDDY > > - depends on !HAVE_HARDLOCKUP_DETECTOR_ARCH > > - help > > - Say Y here to prefer the buddy hardlockup detector over the perf one. > > - > > - With the buddy detector, each CPU uses its softlockup hrtimer > > - to check that the next CPU is processing hrtimer interrupts by > > - verifying that a counter is increasing. > > - > > - This hardlockup detector is useful on systems that don't have > > - an arch-specific hardlockup detector or if resources needed > > - for the hardlockup detector are better used for other things. > > - > > config HARDLOCKUP_DETECTOR_PERF > > - bool > > + bool "Enable perf-based hard lockup detector (preferred)" > > depends on HARDLOCKUP_DETECTOR > > - depends on HAVE_HARDLOCKUP_DETECTOR_PERF && !HARDLOCKUP_DETECTOR_PREFER_BUDDY > > + depends on HAVE_HARDLOCKUP_DETECTOR_PERF > > depends on !HAVE_HARDLOCKUP_DETECTOR_ARCH > > select HARDLOCKUP_DETECTOR_COUNTS_HRTIMER > > + help > > + This detector uses a perf event on the CPU to detect when a CPU > > + has become non-maskable interrupt (NMI) stuck. This is the > > + preferred method on modern systems as it can detect lockups on > > + all CPUs at the same time. > > I'd say this option should be the default for kernel developers but > shouldn't be used by default to free the perf event and due to the > extra power overhead. > > Thanks, > Ian > > > config HARDLOCKUP_DETECTOR_BUDDY > > - bool > > + bool "Enable buddy-based hard lockup detector" > > depends on HARDLOCKUP_DETECTOR > > depends on HAVE_HARDLOCKUP_DETECTOR_BUDDY > > - depends on !HAVE_HARDLOCKUP_DETECTOR_PERF || HARDLOCKUP_DETECTOR_PREFER_BUDDY > > depends on !HAVE_HARDLOCKUP_DETECTOR_ARCH > > select HARDLOCKUP_DETECTOR_COUNTS_HRTIMER > > + help > > + This is an alternative lockup detector that uses a heartbeat > > + mechanism between CPUs to detect when one has stopped responding. > > + It is less precise than the perf-based detector and cannot detect > > + all-CPU lockups, but it does not require a perf counter. > > + > > +config CONFIG_HARDLOCKUP_DETECTOR_MULTIPLE > > + bool > > + depends on HARDLOCKUP_DETECTOR_PERF && HARDLOCKUP_DETECTOR_BUDDY > > > > config HARDLOCKUP_DETECTOR_ARCH > > bool > > -- > > 2.43.0 > > -- Jinchao
On Tue, Sep 16, 2025 at 6:47 PM Jinchao Wang <wangjinchao600@gmail.com> wrote: > > On Tue, Sep 16, 2025 at 05:03:48PM -0700, Ian Rogers wrote: > > On Tue, Sep 16, 2025 at 7:51 AM Jinchao Wang <wangjinchao600@gmail.com> wrote: > > > > > > Currently, the hard lockup detector is selected at compile time via > > > Kconfig, which requires a kernel rebuild to switch implementations. > > > This is inflexible, especially on systems where a perf event may not > > > be available or may be needed for other tasks. > > > > > > This commit refactors the hard lockup detector to replace a rigid > > > compile-time choice with a flexible build-time and boot-time solution. > > > The patch supports building the kernel with either detector > > > independently, or with both. When both are built, a new boot parameter > > > `hardlockup_detector="perf|buddy"` allows the selection at boot time. > > > This is a more robust and user-friendly design. > > > > > > This patch is a follow-up to the discussion on the kernel mailing list > > > regarding the preference and future of the hard lockup detectors. It > > > implements a flexible solution that addresses the community's need to > > > select an appropriate detector at boot time. > > > > > > The core changes are: > > > - The `perf` and `buddy` watchdog implementations are separated into > > > distinct functions (e.g., `watchdog_perf_hardlockup_enable`). > > > - Global function pointers are introduced (`watchdog_hardlockup_enable_ptr`) > > > to serve as a single API for the entire feature. > > > - A new `hardlockup_detector=` boot parameter is added to allow the > > > user to select the desired detector at boot time. > > > - The Kconfig options are simplified by removing the complex > > > `HARDLOCKUP_DETECTOR_PREFER_BUDDY` and allowing both detectors to be > > > built without mutual exclusion. > > > - The weak stubs are updated to call the new function pointers, > > > centralizing the watchdog logic. > > > > What is the impact on /proc/sys/kernel/nmi_watchdog ? Is that > > enabling and disabling whatever the boot time choice was? I'm not sure > > why this has to be a boot time option given the ability to configure > > via /proc/sys/kernel/nmi_watchdog. > The new hardlockup_detector boot parameter and the existing > /proc/sys/kernel/nmi_watchdog file serve different purposes. > > The boot parameter selects the type of hard lockup detector (perf or buddy). > This choice is made once at boot. > > /proc/sys/kernel/nmi_watchdog, on the other hand, is only a simple on/off > switch for the currently selected detector. It does not change the detector's > type. So the name "nmi_watchdog" for the buddy watchdog is wrong for fairly obvious naming reasons but also because we can't differentiate when a perf event has been taken or not - this impacts perf that is choosing not to group events in metrics because of it, reducing the metric's accuracy. We need an equivalent "buddy_watchdog" file to the "nmi_watchdog" file. If we have such a file then if I did "echo 1 > /proc/sys/kernel/nmi_watchdog" I'd expect the buddy watchdog to be disabled and the perf event one to be enabled. Similarly, if I did "echo 1 > /proc/sys/kernel/buddy_watchdog" then I would expect the perf event watchdog to be disabled and the buddy one enabled. If I did "echo 0 > /proc/sys/kernel/nmi_watchdog; echo 0 > /proc/sys/kernel/buddy_watchdog" then I'd expect neither to be enabled. I don't see why choosing the type of watchdog implementation at boot time is particularly desirable. It seems sensible to default normal people to using the buddy watchdog (more perf events, power...) and CONFIG_DEBUG_KERNEL type people to using the perf event one. As the "nmi_watchdog" file may be assumed to control the buddy watchdog, perhaps a compatibility option (where the "nmi_watchdog" file controls the buddy watchdog) is needed so that user code has time to migrate. Thanks, Ian > > > > > Link: https://lore.kernel.org/all/20250915035355.10846-1-cuiyunhui@bytedance.com/ > > > Link: https://lore.kernel.org/all/CAD=FV=WWUiCi6bZCs_gseFpDDWNkuJMoL6XCftEo6W7q6jRCkg@mail.gmail.com/ > > > > > > Signed-off-by: Jinchao Wang <wangjinchao600@gmail.com> > > > --- > > > .../admin-guide/kernel-parameters.txt | 7 +++ > > > include/linux/nmi.h | 6 +++ > > > kernel/watchdog.c | 46 ++++++++++++++++++- > > > kernel/watchdog_buddy.c | 7 +-- > > > kernel/watchdog_perf.c | 10 ++-- > > > lib/Kconfig.debug | 37 +++++++-------- > > > 6 files changed, 85 insertions(+), 28 deletions(-) > > > > > > diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt > > > index 5a7a83c411e9..0af214ee566c 100644 > > > --- a/Documentation/admin-guide/kernel-parameters.txt > > > +++ b/Documentation/admin-guide/kernel-parameters.txt > > > @@ -1828,6 +1828,13 @@ > > > backtraces on all cpus. > > > Format: 0 | 1 > > > > > > + hardlockup_detector= > > > + [perf, buddy] Selects the hard lockup detector to use at > > > + boot time. > > > + Format: <string> > > > + - "perf": Use the perf-based detector. > > > + - "buddy": Use the buddy-based detector. > > > + > > > hash_pointers= > > > [KNL,EARLY] > > > By default, when pointers are printed to the console > > > diff --git a/include/linux/nmi.h b/include/linux/nmi.h > > > index cf3c6ab408aa..9298980ce572 100644 > > > --- a/include/linux/nmi.h > > > +++ b/include/linux/nmi.h > > > @@ -100,6 +100,9 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs); > > > #endif > > > > > > #if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF) > > > +void watchdog_perf_hardlockup_enable(unsigned int cpu); > > > +void watchdog_perf_hardlockup_disable(unsigned int cpu); > > > +extern int watchdog_perf_hardlockup_probe(void); > > > extern void hardlockup_detector_perf_stop(void); > > > extern void hardlockup_detector_perf_restart(void); > > > extern void hardlockup_config_perf_event(const char *str); > > > @@ -120,6 +123,9 @@ void watchdog_hardlockup_disable(unsigned int cpu); > > > void lockup_detector_reconfigure(void); > > > > > > #ifdef CONFIG_HARDLOCKUP_DETECTOR_BUDDY > > > +void watchdog_buddy_hardlockup_enable(unsigned int cpu); > > > +void watchdog_buddy_hardlockup_disable(unsigned int cpu); > > > +int watchdog_buddy_hardlockup_probe(void); > > > void watchdog_buddy_check_hardlockup(int hrtimer_interrupts); > > > #else > > > static inline void watchdog_buddy_check_hardlockup(int hrtimer_interrupts) {} > > > diff --git a/kernel/watchdog.c b/kernel/watchdog.c > > > index 80b56c002c7f..85451d24a77d 100644 > > > --- a/kernel/watchdog.c > > > +++ b/kernel/watchdog.c > > > @@ -55,6 +55,37 @@ unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask); > > > > > > #ifdef CONFIG_HARDLOCKUP_DETECTOR > > > > > > +#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF > > > +/* The global function pointers */ > > > +void (*watchdog_hardlockup_enable_ptr)(unsigned int cpu) = watchdog_perf_hardlockup_enable; > > > +void (*watchdog_hardlockup_disable_ptr)(unsigned int cpu) = watchdog_perf_hardlockup_disable; > > > +int (*watchdog_hardlockup_probe_ptr)(void) = watchdog_perf_hardlockup_probe; > > > +#elif defined(CONFIG_HARDLOCKUP_DETECTOR_BUDDY) > > > +void (*watchdog_hardlockup_enable_ptr)(unsigned int cpu) = watchdog_buddy_hardlockup_enable; > > > +void (*watchdog_hardlockup_disable_ptr)(unsigned int cpu) = watchdog_buddy_hardlockup_disable; > > > +int (*watchdog_hardlockup_probe_ptr)(void) = watchdog_buddy_hardlockup_probe; > > > +#endif > > > + > > > +#ifdef CONFIG_HARDLOCKUP_DETECTOR_MULTIPLE > > > +static char *hardlockup_detector_type = "perf"; /* Default to perf */ > > > +static int __init set_hardlockup_detector_type(char *str) > > > +{ > > > + if (!strncmp(str, "perf", 4)) { > > > + watchdog_hardlockup_enable_ptr = watchdog_perf_hardlockup_enable; > > > + watchdog_hardlockup_disable_ptr = watchdog_perf_hardlockup_disable; > > > + watchdog_hardlockup_probe_ptr = watchdog_perf_hardlockup_probe; > > > + } else if (!strncmp(str, "buddy", 5)) { > > > + watchdog_hardlockup_enable_ptr = watchdog_buddy_hardlockup_enable; > > > + watchdog_hardlockup_disable_ptr = watchdog_buddy_hardlockup_disable; > > > + watchdog_hardlockup_probe_ptr = watchdog_buddy_hardlockup_probe; > > > + } > > > + return 1; > > > +} > > > + > > > +__setup("hardlockup_detector=", set_hardlockup_detector_type); > > > + > > > +#endif > > > + > > > # ifdef CONFIG_SMP > > > int __read_mostly sysctl_hardlockup_all_cpu_backtrace; > > > # endif /* CONFIG_SMP */ > > > @@ -262,9 +293,17 @@ static inline void watchdog_hardlockup_kick(void) { } > > > * softlockup watchdog start and stop. The detector must select the > > > * SOFTLOCKUP_DETECTOR Kconfig. > > > */ > > > -void __weak watchdog_hardlockup_enable(unsigned int cpu) { } > > > +void __weak watchdog_hardlockup_enable(unsigned int cpu) > > > +{ > > > + if (watchdog_hardlockup_enable_ptr) > > > + watchdog_hardlockup_enable_ptr(cpu); > > > +} > > > > > > -void __weak watchdog_hardlockup_disable(unsigned int cpu) { } > > > +void __weak watchdog_hardlockup_disable(unsigned int cpu) > > > +{ > > > + if (watchdog_hardlockup_disable_ptr) > > > + watchdog_hardlockup_disable_ptr(cpu); > > > +} > > > > > > /* > > > * Watchdog-detector specific API. > > > @@ -275,6 +314,9 @@ void __weak watchdog_hardlockup_disable(unsigned int cpu) { } > > > */ > > > int __weak __init watchdog_hardlockup_probe(void) > > > { > > > + if (watchdog_hardlockup_probe_ptr) > > > + return watchdog_hardlockup_probe_ptr(); > > > + > > > return -ENODEV; > > > } > > > > > > diff --git a/kernel/watchdog_buddy.c b/kernel/watchdog_buddy.c > > > index ee754d767c21..390d89bfcafa 100644 > > > --- a/kernel/watchdog_buddy.c > > > +++ b/kernel/watchdog_buddy.c > > > @@ -19,15 +19,16 @@ static unsigned int watchdog_next_cpu(unsigned int cpu) > > > return next_cpu; > > > } > > > > > > -int __init watchdog_hardlockup_probe(void) > > > +int __init watchdog_buddy_hardlockup_probe(void) > > > { > > > return 0; > > > } > > > > > > -void watchdog_hardlockup_enable(unsigned int cpu) > > > +void watchdog_buddy_hardlockup_enable(unsigned int cpu) > > > { > > > unsigned int next_cpu; > > > > > > + pr_info("ddddd %s\n", __func__); > > > /* > > > * The new CPU will be marked online before the hrtimer interrupt > > > * gets a chance to run on it. If another CPU tests for a > > > @@ -58,7 +59,7 @@ void watchdog_hardlockup_enable(unsigned int cpu) > > > cpumask_set_cpu(cpu, &watchdog_cpus); > > > } > > > > > > -void watchdog_hardlockup_disable(unsigned int cpu) > > > +void watchdog_buddy_hardlockup_disable(unsigned int cpu) > > > { > > > unsigned int next_cpu = watchdog_next_cpu(cpu); > > > > > > diff --git a/kernel/watchdog_perf.c b/kernel/watchdog_perf.c > > > index 9c58f5b4381d..270110e58f20 100644 > > > --- a/kernel/watchdog_perf.c > > > +++ b/kernel/watchdog_perf.c > > > @@ -153,10 +153,12 @@ static int hardlockup_detector_event_create(void) > > > * watchdog_hardlockup_enable - Enable the local event > > > * @cpu: The CPU to enable hard lockup on. > > > */ > > > -void watchdog_hardlockup_enable(unsigned int cpu) > > > +void watchdog_perf_hardlockup_enable(unsigned int cpu) > > > { > > > WARN_ON_ONCE(cpu != smp_processor_id()); > > > > > > + pr_info("ddddd %s\n", __func__); > > > + > > > if (hardlockup_detector_event_create()) > > > return; > > > > > > @@ -172,7 +174,7 @@ void watchdog_hardlockup_enable(unsigned int cpu) > > > * watchdog_hardlockup_disable - Disable the local event > > > * @cpu: The CPU to enable hard lockup on. > > > */ > > > -void watchdog_hardlockup_disable(unsigned int cpu) > > > +void watchdog_perf_hardlockup_disable(unsigned int cpu) > > > { > > > struct perf_event *event = this_cpu_read(watchdog_ev); > > > > > > @@ -257,10 +259,12 @@ bool __weak __init arch_perf_nmi_is_available(void) > > > /** > > > * watchdog_hardlockup_probe - Probe whether NMI event is available at all > > > */ > > > -int __init watchdog_hardlockup_probe(void) > > > +int __init watchdog_perf_hardlockup_probe(void) > > > { > > > int ret; > > > > > > + pr_info("ddddd %s\n", __func__); > > > + > > > if (!arch_perf_nmi_is_available()) > > > return -ENODEV; > > > > > > diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug > > > index dc0e0c6ed075..443353fad1c1 100644 > > > --- a/lib/Kconfig.debug > > > +++ b/lib/Kconfig.debug > > > @@ -1167,36 +1167,33 @@ config HARDLOCKUP_DETECTOR > > > # > > > # Note that arch-specific variants are always preferred. > > > # > > > -config HARDLOCKUP_DETECTOR_PREFER_BUDDY > > > - bool "Prefer the buddy CPU hardlockup detector" > > > - depends on HARDLOCKUP_DETECTOR > > > - depends on HAVE_HARDLOCKUP_DETECTOR_PERF && HAVE_HARDLOCKUP_DETECTOR_BUDDY > > > - depends on !HAVE_HARDLOCKUP_DETECTOR_ARCH > > > - help > > > - Say Y here to prefer the buddy hardlockup detector over the perf one. > > > - > > > - With the buddy detector, each CPU uses its softlockup hrtimer > > > - to check that the next CPU is processing hrtimer interrupts by > > > - verifying that a counter is increasing. > > > - > > > - This hardlockup detector is useful on systems that don't have > > > - an arch-specific hardlockup detector or if resources needed > > > - for the hardlockup detector are better used for other things. > > > - > > > config HARDLOCKUP_DETECTOR_PERF > > > - bool > > > + bool "Enable perf-based hard lockup detector (preferred)" > > > depends on HARDLOCKUP_DETECTOR > > > - depends on HAVE_HARDLOCKUP_DETECTOR_PERF && !HARDLOCKUP_DETECTOR_PREFER_BUDDY > > > + depends on HAVE_HARDLOCKUP_DETECTOR_PERF > > > depends on !HAVE_HARDLOCKUP_DETECTOR_ARCH > > > select HARDLOCKUP_DETECTOR_COUNTS_HRTIMER > > > + help > > > + This detector uses a perf event on the CPU to detect when a CPU > > > + has become non-maskable interrupt (NMI) stuck. This is the > > > + preferred method on modern systems as it can detect lockups on > > > + all CPUs at the same time. > > > > I'd say this option should be the default for kernel developers but > > shouldn't be used by default to free the perf event and due to the > > extra power overhead. > > > > Thanks, > > Ian > > > > > config HARDLOCKUP_DETECTOR_BUDDY > > > - bool > > > + bool "Enable buddy-based hard lockup detector" > > > depends on HARDLOCKUP_DETECTOR > > > depends on HAVE_HARDLOCKUP_DETECTOR_BUDDY > > > - depends on !HAVE_HARDLOCKUP_DETECTOR_PERF || HARDLOCKUP_DETECTOR_PREFER_BUDDY > > > depends on !HAVE_HARDLOCKUP_DETECTOR_ARCH > > > select HARDLOCKUP_DETECTOR_COUNTS_HRTIMER > > > + help > > > + This is an alternative lockup detector that uses a heartbeat > > > + mechanism between CPUs to detect when one has stopped responding. > > > + It is less precise than the perf-based detector and cannot detect > > > + all-CPU lockups, but it does not require a perf counter. > > > + > > > +config CONFIG_HARDLOCKUP_DETECTOR_MULTIPLE > > > + bool > > > + depends on HARDLOCKUP_DETECTOR_PERF && HARDLOCKUP_DETECTOR_BUDDY > > > > > > config HARDLOCKUP_DETECTOR_ARCH > > > bool > > > -- > > > 2.43.0 > > > > > -- > Jinchao
Hello, On Tue, Sep 16, 2025 at 10:13:12PM -0700, Ian Rogers wrote: > On Tue, Sep 16, 2025 at 6:47 PM Jinchao Wang <wangjinchao600@gmail.com> wrote: > > > > On Tue, Sep 16, 2025 at 05:03:48PM -0700, Ian Rogers wrote: > > > On Tue, Sep 16, 2025 at 7:51 AM Jinchao Wang <wangjinchao600@gmail.com> wrote: > > > > > > > > Currently, the hard lockup detector is selected at compile time via > > > > Kconfig, which requires a kernel rebuild to switch implementations. > > > > This is inflexible, especially on systems where a perf event may not > > > > be available or may be needed for other tasks. > > > > > > > > This commit refactors the hard lockup detector to replace a rigid > > > > compile-time choice with a flexible build-time and boot-time solution. > > > > The patch supports building the kernel with either detector > > > > independently, or with both. When both are built, a new boot parameter > > > > `hardlockup_detector="perf|buddy"` allows the selection at boot time. > > > > This is a more robust and user-friendly design. > > > > > > > > This patch is a follow-up to the discussion on the kernel mailing list > > > > regarding the preference and future of the hard lockup detectors. It > > > > implements a flexible solution that addresses the community's need to > > > > select an appropriate detector at boot time. > > > > > > > > The core changes are: > > > > - The `perf` and `buddy` watchdog implementations are separated into > > > > distinct functions (e.g., `watchdog_perf_hardlockup_enable`). > > > > - Global function pointers are introduced (`watchdog_hardlockup_enable_ptr`) > > > > to serve as a single API for the entire feature. > > > > - A new `hardlockup_detector=` boot parameter is added to allow the > > > > user to select the desired detector at boot time. > > > > - The Kconfig options are simplified by removing the complex > > > > `HARDLOCKUP_DETECTOR_PREFER_BUDDY` and allowing both detectors to be > > > > built without mutual exclusion. > > > > - The weak stubs are updated to call the new function pointers, > > > > centralizing the watchdog logic. > > > > > > What is the impact on /proc/sys/kernel/nmi_watchdog ? Is that > > > enabling and disabling whatever the boot time choice was? I'm not sure > > > why this has to be a boot time option given the ability to configure > > > via /proc/sys/kernel/nmi_watchdog. > > The new hardlockup_detector boot parameter and the existing > > /proc/sys/kernel/nmi_watchdog file serve different purposes. > > > > The boot parameter selects the type of hard lockup detector (perf or buddy). > > This choice is made once at boot. > > > > /proc/sys/kernel/nmi_watchdog, on the other hand, is only a simple on/off > > switch for the currently selected detector. It does not change the detector's > > type. > > So the name "nmi_watchdog" for the buddy watchdog is wrong for fairly > obvious naming reasons but also because we can't differentiate when a > perf event has been taken or not - this impacts perf that is choosing > not to group events in metrics because of it, reducing the metric's > accuracy. We need an equivalent "buddy_watchdog" file to the > "nmi_watchdog" file. If we have such a file then if I did "echo 1 > > /proc/sys/kernel/nmi_watchdog" I'd expect the buddy watchdog to be > disabled and the perf event one to be enabled. Similarly, if I did > "echo 1 > /proc/sys/kernel/buddy_watchdog" then I would expect the > perf event watchdog to be disabled and the buddy one enabled. If I did > "echo 0 > /proc/sys/kernel/nmi_watchdog; echo 0 > > /proc/sys/kernel/buddy_watchdog" then I'd expect neither to be > enabled. I don't see why choosing the type of watchdog implementation > at boot time is particularly desirable. It seems sensible to default > normal people to using the buddy watchdog (more perf events, power...) > and CONFIG_DEBUG_KERNEL type people to using the perf event one. As > the "nmi_watchdog" file may be assumed to control the buddy watchdog, > perhaps a compatibility option (where the "nmi_watchdog" file controls > the buddy watchdog) is needed so that user code has time to migrate. Sounds good to me. For perf tools, it'd be great if we can have a run- time check which watchdog is selected. Thanks, Namhyung
On Tue, Sep 16, 2025 at 10:35:46PM -0700, Namhyung Kim wrote: > Hello, > > On Tue, Sep 16, 2025 at 10:13:12PM -0700, Ian Rogers wrote: > > On Tue, Sep 16, 2025 at 6:47 PM Jinchao Wang <wangjinchao600@gmail.com> wrote: > > > > > > On Tue, Sep 16, 2025 at 05:03:48PM -0700, Ian Rogers wrote: > > > > On Tue, Sep 16, 2025 at 7:51 AM Jinchao Wang <wangjinchao600@gmail.com> wrote: > > > > > > > > > > Currently, the hard lockup detector is selected at compile time via > > > > > Kconfig, which requires a kernel rebuild to switch implementations. > > > > > This is inflexible, especially on systems where a perf event may not > > > > > be available or may be needed for other tasks. > > > > > > > > > > This commit refactors the hard lockup detector to replace a rigid > > > > > compile-time choice with a flexible build-time and boot-time solution. > > > > > The patch supports building the kernel with either detector > > > > > independently, or with both. When both are built, a new boot parameter > > > > > `hardlockup_detector="perf|buddy"` allows the selection at boot time. > > > > > This is a more robust and user-friendly design. > > > > > > > > > > This patch is a follow-up to the discussion on the kernel mailing list > > > > > regarding the preference and future of the hard lockup detectors. It > > > > > implements a flexible solution that addresses the community's need to > > > > > select an appropriate detector at boot time. > > > > > > > > > > The core changes are: > > > > > - The `perf` and `buddy` watchdog implementations are separated into > > > > > distinct functions (e.g., `watchdog_perf_hardlockup_enable`). > > > > > - Global function pointers are introduced (`watchdog_hardlockup_enable_ptr`) > > > > > to serve as a single API for the entire feature. > > > > > - A new `hardlockup_detector=` boot parameter is added to allow the > > > > > user to select the desired detector at boot time. > > > > > - The Kconfig options are simplified by removing the complex > > > > > `HARDLOCKUP_DETECTOR_PREFER_BUDDY` and allowing both detectors to be > > > > > built without mutual exclusion. > > > > > - The weak stubs are updated to call the new function pointers, > > > > > centralizing the watchdog logic. > > > > > > > > What is the impact on /proc/sys/kernel/nmi_watchdog ? Is that > > > > enabling and disabling whatever the boot time choice was? I'm not sure > > > > why this has to be a boot time option given the ability to configure > > > > via /proc/sys/kernel/nmi_watchdog. > > > The new hardlockup_detector boot parameter and the existing > > > /proc/sys/kernel/nmi_watchdog file serve different purposes. > > > > > > The boot parameter selects the type of hard lockup detector (perf or buddy). > > > This choice is made once at boot. > > > > > > /proc/sys/kernel/nmi_watchdog, on the other hand, is only a simple on/off > > > switch for the currently selected detector. It does not change the detector's > > > type. > > > > So the name "nmi_watchdog" for the buddy watchdog is wrong for fairly > > obvious naming reasons but also because we can't differentiate when a > > perf event has been taken or not - this impacts perf that is choosing > > not to group events in metrics because of it, reducing the metric's > > accuracy. We need an equivalent "buddy_watchdog" file to the > > "nmi_watchdog" file. If we have such a file then if I did "echo 1 > > > /proc/sys/kernel/nmi_watchdog" I'd expect the buddy watchdog to be > > disabled and the perf event one to be enabled. Similarly, if I did > > "echo 1 > /proc/sys/kernel/buddy_watchdog" then I would expect the > > perf event watchdog to be disabled and the buddy one enabled. If I did > > "echo 0 > /proc/sys/kernel/nmi_watchdog; echo 0 > > > /proc/sys/kernel/buddy_watchdog" then I'd expect neither to be > > enabled. I don't see why choosing the type of watchdog implementation > > at boot time is particularly desirable. It seems sensible to default > > normal people to using the buddy watchdog (more perf events, power...) > > and CONFIG_DEBUG_KERNEL type people to using the perf event one. As > > the "nmi_watchdog" file may be assumed to control the buddy watchdog, > > perhaps a compatibility option (where the "nmi_watchdog" file controls > > the buddy watchdog) is needed so that user code has time to migrate. > > Sounds good to me. For perf tools, it'd be great if we can have a run- > time check which watchdog is selected. Considering backward compatibility, I prefer to keep /proc/sys/kernel/nmi_watchdog and introduce a new file called /proc/sys/kernel/hardlockup_detector_type, which only shows the default string or the boot parameter. The global str pointer hardlockup_detector_type was already introduced in the patch, so exposing it in a file is straightforward. > > Thanks, > Namhyung > -- Jinchao
© 2016 - 2025 Red Hat, Inc.