[PATCH 3/3] watchdog: add lockup_sys_info sysctl to dump sys info on system lockup

Feng Tang posted 3 patches 1 month, 1 week ago
There is a newer version of this series
[PATCH 3/3] watchdog: add lockup_sys_info sysctl to dump sys info on system lockup
Posted by Feng Tang 1 month, 1 week ago
When soft/hard lockup happens, developers may need different kinds of
system information (call-stacks, memory info, locks, etc.) to help debugging.

Add 'lockup_sys_info' sysctl knob to take human readable string like
"tasks,mem,timers,locks,ftrace,...", and when system lockup happens, all
requested information will be dumped. (refer kernel/sys_info.c for more
details).

Signed-off-by: Feng Tang <feng.tang@linux.alibaba.com>
---
 Documentation/admin-guide/sysctl/kernel.rst |  5 +++++
 kernel/watchdog.c                           | 21 ++++++++++++++++++++-
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index 45b4408dad31..4e39e661d5ab 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -582,6 +582,11 @@ if leaking kernel pointer values to unprivileged users is a concern.
 When ``kptr_restrict`` is set to 2, kernel pointers printed using
 %pK will be replaced with 0s regardless of privileges.
 
+lockup_sys_info
+==================
+A comma separated list of extra system information to be dumped when
+soft/hard lockup is detected, for example, "tasks,mem,timers,locks,...".
+Refer 'panic_sys_info' section below for more details.
 
 modprobe
 ========
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 659f5844393c..18d8f2a32318 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -25,6 +25,7 @@
 #include <linux/stop_machine.h>
 #include <linux/sysctl.h>
 #include <linux/tick.h>
+#include <linux/sys_info.h>
 
 #include <linux/sched/clock.h>
 #include <linux/sched/debug.h>
@@ -53,6 +54,13 @@ static int __read_mostly watchdog_hardlockup_available;
 struct cpumask watchdog_cpumask __read_mostly;
 unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
 
+/*
+ * A bitmask to control what kinds of system info to be printed when
+ * system lockup is detected, it could be task, memory, lock etc. Refer
+ * include/linux/sys_info.h for detailed bit definition.
+ */
+static unsigned long lockup_si_mask;
+
 #ifdef CONFIG_HARDLOCKUP_DETECTOR
 
 # ifdef CONFIG_SMP
@@ -240,6 +248,7 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs)
 				clear_bit_unlock(0, &hard_lockup_nmi_warn);
 		}
 
+		sys_info(lockup_si_mask);
 		if (hardlockup_panic)
 			nmi_panic(regs, "Hard LOCKUP");
 
@@ -746,9 +755,11 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 	unsigned long touch_ts, period_ts, now;
 	struct pt_regs *regs = get_irq_regs();
 	int duration;
-	int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
+	int softlockup_all_cpu_backtrace;
 	unsigned long flags;
 
+	softlockup_all_cpu_backtrace = (lockup_si_mask & SYS_INFO_ALL_BT) ?
+					1 : sysctl_softlockup_all_cpu_backtrace;
 	if (!watchdog_enabled)
 		return HRTIMER_NORESTART;
 
@@ -846,6 +857,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 		}
 
 		add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
+		sys_info(lockup_si_mask & ~SYS_INFO_ALL_BT);
 		if (softlockup_panic)
 			panic("softlockup: hung tasks");
 	}
@@ -1178,6 +1190,13 @@ static const struct ctl_table watchdog_sysctls[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_watchdog_cpumask,
 	},
+	{
+		.procname	= "lockup_sys_info",
+		.data		= &lockup_si_mask,
+		.maxlen         = sizeof(lockup_si_mask),
+		.mode		= 0644,
+		.proc_handler	= sysctl_sys_info_handler,
+	},
 #ifdef CONFIG_SOFTLOCKUP_DETECTOR
 	{
 		.procname       = "soft_watchdog",
-- 
2.43.5
Re: [PATCH 3/3] watchdog: add lockup_sys_info sysctl to dump sys info on system lockup
Posted by Petr Mladek 1 month, 1 week ago
On Thu 2025-11-06 10:30:32, Feng Tang wrote:
> When soft/hard lockup happens, developers may need different kinds of
> system information (call-stacks, memory info, locks, etc.) to help debugging.
> 
> Add 'lockup_sys_info' sysctl knob to take human readable string like
> "tasks,mem,timers,locks,ftrace,...", and when system lockup happens, all
> requested information will be dumped. (refer kernel/sys_info.c for more
> details).
> 
> --- a/kernel/watchdog.c
> +++ b/kernel/watchdog.c
> @@ -53,6 +54,13 @@ static int __read_mostly watchdog_hardlockup_available;
>  struct cpumask watchdog_cpumask __read_mostly;
>  unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
>  
> +/*
> + * A bitmask to control what kinds of system info to be printed when
> + * system lockup is detected, it could be task, memory, lock etc. Refer
> + * include/linux/sys_info.h for detailed bit definition.
> + */
> +static unsigned long lockup_si_mask;
> +
>  #ifdef CONFIG_HARDLOCKUP_DETECTOR
>  
>  # ifdef CONFIG_SMP
> @@ -240,6 +248,7 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs)
>  				clear_bit_unlock(0, &hard_lockup_nmi_warn);
>  		}

The code right above printed backtaces from all CPUs when
sysctl_hardlockup_all_cpu_backtrace.

> +		sys_info(lockup_si_mask);

And sys_info() could print it again when SYS_INFO_ALL_BT
bit is set. The hard lockup detector should use the same
trick as the softlockup detector in watchdog_timer_fn().

>  		if (hardlockup_panic)
>  			nmi_panic(regs, "Hard LOCKUP");
>  
> @@ -746,9 +755,11 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
>  	unsigned long touch_ts, period_ts, now;
>  	struct pt_regs *regs = get_irq_regs();
>  	int duration;
> -	int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
> +	int softlockup_all_cpu_backtrace;
>  	unsigned long flags;
>  
> +	softlockup_all_cpu_backtrace = (lockup_si_mask & SYS_INFO_ALL_BT) ?
> +					1 : sysctl_softlockup_all_cpu_backtrace;
>  	if (!watchdog_enabled)
>  		return HRTIMER_NORESTART;
>  
> @@ -846,6 +857,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
>  		}
>  
>  		add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
> +		sys_info(lockup_si_mask & ~SYS_INFO_ALL_BT);
>  		if (softlockup_panic)
>  			panic("softlockup: hung tasks");
>  	}
> @@ -1178,6 +1190,13 @@ static const struct ctl_table watchdog_sysctls[] = {
>  		.mode		= 0644,
>  		.proc_handler	= proc_watchdog_cpumask,
>  	},
> +	{
> +		.procname	= "lockup_sys_info",
> +		.data		= &lockup_si_mask,
> +		.maxlen         = sizeof(lockup_si_mask),
> +		.mode		= 0644,
> +		.proc_handler	= sysctl_sys_info_handler,
> +	},

There already exists:

	+ hardlockup_all_cpu_backtrace
	+ hardlockup_panic
	+ softlockup_all_cpu_backtrace
	+ softlockup_panic

IMHO, it would make sense to introduce separate:

	+ hardlockup_sys_info
	+ softlockup_sys_info


Best Regards,
Petr
Re: [PATCH 3/3] watchdog: add lockup_sys_info sysctl to dump sys info on system lockup
Posted by Feng Tang 1 month, 1 week ago
On Tue, Nov 11, 2025 at 02:26:05PM +0100, Petr Mladek wrote:
> On Thu 2025-11-06 10:30:32, Feng Tang wrote:
> > When soft/hard lockup happens, developers may need different kinds of
> > system information (call-stacks, memory info, locks, etc.) to help debugging.
> > 
> > Add 'lockup_sys_info' sysctl knob to take human readable string like
> > "tasks,mem,timers,locks,ftrace,...", and when system lockup happens, all
> > requested information will be dumped. (refer kernel/sys_info.c for more
> > details).
> > 
> > --- a/kernel/watchdog.c
> > +++ b/kernel/watchdog.c
> > @@ -53,6 +54,13 @@ static int __read_mostly watchdog_hardlockup_available;
> >  struct cpumask watchdog_cpumask __read_mostly;
> >  unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
> >  
> > +/*
> > + * A bitmask to control what kinds of system info to be printed when
> > + * system lockup is detected, it could be task, memory, lock etc. Refer
> > + * include/linux/sys_info.h for detailed bit definition.
> > + */
> > +static unsigned long lockup_si_mask;
> > +
> >  #ifdef CONFIG_HARDLOCKUP_DETECTOR
> >  
> >  # ifdef CONFIG_SMP
> > @@ -240,6 +248,7 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs)
> >  				clear_bit_unlock(0, &hard_lockup_nmi_warn);
> >  		}
> 
> The code right above printed backtaces from all CPUs when
> sysctl_hardlockup_all_cpu_backtrace.
> 
> > +		sys_info(lockup_si_mask);
> 
> And sys_info() could print it again when SYS_INFO_ALL_BT
> bit is set. The hard lockup detector should use the same
> trick as the softlockup detector in watchdog_timer_fn().

Yes, I missed that. Thanks for the catching!

> >  		if (hardlockup_panic)
> >  			nmi_panic(regs, "Hard LOCKUP");
> >  
> > @@ -746,9 +755,11 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
> >  	unsigned long touch_ts, period_ts, now;
> >  	struct pt_regs *regs = get_irq_regs();
> >  	int duration;
> > -	int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
> > +	int softlockup_all_cpu_backtrace;
> >  	unsigned long flags;
> >  
> > +	softlockup_all_cpu_backtrace = (lockup_si_mask & SYS_INFO_ALL_BT) ?
> > +					1 : sysctl_softlockup_all_cpu_backtrace;
> >  	if (!watchdog_enabled)
> >  		return HRTIMER_NORESTART;
> >  
> > @@ -846,6 +857,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
> >  		}
> >  
> >  		add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
> > +		sys_info(lockup_si_mask & ~SYS_INFO_ALL_BT);
> >  		if (softlockup_panic)
> >  			panic("softlockup: hung tasks");
> >  	}
> > @@ -1178,6 +1190,13 @@ static const struct ctl_table watchdog_sysctls[] = {
> >  		.mode		= 0644,
> >  		.proc_handler	= proc_watchdog_cpumask,
> >  	},
> > +	{
> > +		.procname	= "lockup_sys_info",
> > +		.data		= &lockup_si_mask,
> > +		.maxlen         = sizeof(lockup_si_mask),
> > +		.mode		= 0644,
> > +		.proc_handler	= sysctl_sys_info_handler,
> > +	},
> 
> There already exists:
> 
> 	+ hardlockup_all_cpu_backtrace
> 	+ hardlockup_panic
> 	+ softlockup_all_cpu_backtrace
> 	+ softlockup_panic
> 
> IMHO, it would make sense to introduce separate:
> 
> 	+ hardlockup_sys_info
> 	+ softlockup_sys_info

Make sense to me, will do.

Thanks,
Feng