[PATCH v2 2/4] hung_task: Add hung_task_sys_info sysctl to dump sys info on task-hung

Feng Tang posted 4 patches 1 month ago
[PATCH v2 2/4] hung_task: Add hung_task_sys_info sysctl to dump sys info on task-hung
Posted by Feng Tang 1 month ago
When task-hung happens, developers may need different kinds of system
information (call-stacks, memory info, locks, etc.) to help debugging.

Add 'hung_task_sys_info' sysctl knob to take human readable string like
"tasks,mem,timers,locks,ftrace,...", and when task-hung happens, all
requested information will be dumped. (refer kernel/sys_info.c for more
details).

Meanwhile, the newly introduced sys_info() call is used to unify some
existing info-dumping knobs.

Suggested-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Feng Tang <feng.tang@linux.alibaba.com>
---
 Documentation/admin-guide/sysctl/kernel.rst |  5 ++
 kernel/hung_task.c                          | 62 +++++++++++++--------
 2 files changed, 43 insertions(+), 24 deletions(-)

diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index a397eeccaea7..45b4408dad31 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -422,6 +422,11 @@ the system boot.
 
 This file shows up if ``CONFIG_DETECT_HUNG_TASK`` is enabled.
 
+hung_task_sys_info
+==================
+A comma separated list of extra system information to be dumped when
+hung task is detected, for example, "tasks,mem,timers,locks,...".
+Refer 'panic_sys_info' section below for more details.
 
 hung_task_timeout_secs
 ======================
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 5ac0e66a1361..5b3a7785d3a2 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -24,6 +24,7 @@
 #include <linux/sched/sysctl.h>
 #include <linux/hung_task.h>
 #include <linux/rwsem.h>
+#include <linux/sys_info.h>
 
 #include <trace/events/sched.h>
 
@@ -59,12 +60,17 @@ static unsigned long __read_mostly sysctl_hung_task_check_interval_secs;
 static int __read_mostly sysctl_hung_task_warnings = 10;
 
 static int __read_mostly did_panic;
-static bool hung_task_show_lock;
 static bool hung_task_call_panic;
-static bool hung_task_show_all_bt;
 
 static struct task_struct *watchdog_task;
 
+/*
+ * A bitmask to control what kinds of system info to be printed when
+ * a hung task is detected, it could be task, memory, lock etc. Refer
+ * include/linux/sys_info.h for detailed bit definition.
+ */
+static unsigned long hung_task_si_mask;
+
 #ifdef CONFIG_SMP
 /*
  * Should we dump all CPUs backtraces in a hung task event?
@@ -217,11 +223,8 @@ static inline void debug_show_blocker(struct task_struct *task, unsigned long ti
 }
 #endif
 
-static void check_hung_task(struct task_struct *t, unsigned long timeout,
-		unsigned long prev_detect_count)
+static void check_hung_task(struct task_struct *t, unsigned long timeout)
 {
-	unsigned long total_hung_task;
-
 	if (!task_is_hung(t, timeout))
 		return;
 
@@ -231,20 +234,13 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout,
 	 */
 	sysctl_hung_task_detect_count++;
 
-	total_hung_task = sysctl_hung_task_detect_count - prev_detect_count;
 	trace_sched_process_hang(t);
 
-	if (sysctl_hung_task_panic && total_hung_task >= sysctl_hung_task_panic) {
-		console_verbose();
-		hung_task_show_lock = true;
-		hung_task_call_panic = true;
-	}
-
 	/*
 	 * Ok, the task did not get scheduled for more than 2 minutes,
 	 * complain:
 	 */
-	if (sysctl_hung_task_warnings || hung_task_call_panic) {
+	if (sysctl_hung_task_warnings) {
 		if (sysctl_hung_task_warnings > 0)
 			sysctl_hung_task_warnings--;
 		pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n",
@@ -259,10 +255,7 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout,
 			" disables this message.\n");
 		sched_show_task(t);
 		debug_show_blocker(t, timeout);
-		hung_task_show_lock = true;
 
-		if (sysctl_hung_task_all_cpu_backtrace)
-			hung_task_show_all_bt = true;
 		if (!sysctl_hung_task_warnings)
 			pr_info("Future hung task reports are suppressed, see sysctl kernel.hung_task_warnings\n");
 	}
@@ -302,8 +295,11 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
 {
 	int max_count = sysctl_hung_task_check_count;
 	unsigned long last_break = jiffies;
+	unsigned long total_hung_task;
 	struct task_struct *g, *t;
 	unsigned long prev_detect_count = sysctl_hung_task_detect_count;
+	int need_warning = sysctl_hung_task_warnings;
+	unsigned long si_mask = hung_task_si_mask;
 
 	/*
 	 * If the system crashed already then all bets are off,
@@ -312,7 +308,7 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
 	if (test_taint(TAINT_DIE) || did_panic)
 		return;
 
-	hung_task_show_lock = false;
+
 	rcu_read_lock();
 	for_each_process_thread(g, t) {
 
@@ -324,18 +320,29 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
 			last_break = jiffies;
 		}
 
-		check_hung_task(t, timeout, prev_detect_count);
+		check_hung_task(t, timeout);
 	}
  unlock:
 	rcu_read_unlock();
-	if (hung_task_show_lock)
-		debug_show_all_locks();
 
-	if (hung_task_show_all_bt) {
-		hung_task_show_all_bt = false;
-		trigger_all_cpu_backtrace();
+	total_hung_task = sysctl_hung_task_detect_count - prev_detect_count;
+	if (!total_hung_task)
+		return;
+
+	if (sysctl_hung_task_panic && total_hung_task >= sysctl_hung_task_panic) {
+		console_verbose();
+		hung_task_call_panic = true;
+	}
+
+	if (need_warning || hung_task_call_panic) {
+		si_mask |= SYS_INFO_LOCKS;
+
+		if (sysctl_hung_task_all_cpu_backtrace)
+			si_mask |= SYS_INFO_ALL_BT;
 	}
 
+	sys_info(si_mask);
+
 	if (hung_task_call_panic)
 		panic("hung_task: blocked tasks");
 }
@@ -434,6 +441,13 @@ static const struct ctl_table hung_task_sysctls[] = {
 		.mode		= 0444,
 		.proc_handler	= proc_doulongvec_minmax,
 	},
+	{
+		.procname	= "hung_task_sys_info",
+		.data		= &hung_task_si_mask,
+		.maxlen         = sizeof(hung_task_si_mask),
+		.mode		= 0644,
+		.proc_handler	= sysctl_sys_info_handler,
+	},
 };
 
 static void __init hung_task_sysctl_init(void)
-- 
2.43.5
Re: [PATCH v2 2/4] hung_task: Add hung_task_sys_info sysctl to dump sys info on task-hung
Posted by Lance Yang 1 month ago

On 2025/11/13 19:10, Feng Tang wrote:
> When task-hung happens, developers may need different kinds of system
> information (call-stacks, memory info, locks, etc.) to help debugging.
> 
> Add 'hung_task_sys_info' sysctl knob to take human readable string like
> "tasks,mem,timers,locks,ftrace,...", and when task-hung happens, all
> requested information will be dumped. (refer kernel/sys_info.c for more
> details).
> 
> Meanwhile, the newly introduced sys_info() call is used to unify some
> existing info-dumping knobs.
> 
> Suggested-by: Petr Mladek <pmladek@suse.com>
> Signed-off-by: Feng Tang <feng.tang@linux.alibaba.com>
> ---
>   Documentation/admin-guide/sysctl/kernel.rst |  5 ++
>   kernel/hung_task.c                          | 62 +++++++++++++--------
>   2 files changed, 43 insertions(+), 24 deletions(-)
> 
> diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
> index a397eeccaea7..45b4408dad31 100644
> --- a/Documentation/admin-guide/sysctl/kernel.rst
> +++ b/Documentation/admin-guide/sysctl/kernel.rst

[...]

> diff --git a/kernel/hung_task.c b/kernel/hung_task.c
> index 5ac0e66a1361..5b3a7785d3a2 100644
> --- a/kernel/hung_task.c
> +++ b/kernel/hung_task.c
> @@ -24,6 +24,7 @@
>   #include <linux/sched/sysctl.h>
>   #include <linux/hung_task.h>
>   #include <linux/rwsem.h>
> +#include <linux/sys_info.h>
>   
>   #include <trace/events/sched.h>
>   
> @@ -59,12 +60,17 @@ static unsigned long __read_mostly sysctl_hung_task_check_interval_secs;
>   static int __read_mostly sysctl_hung_task_warnings = 10;
>   
>   static int __read_mostly did_panic;
> -static bool hung_task_show_lock;
>   static bool hung_task_call_panic;
> -static bool hung_task_show_all_bt;
>   
>   static struct task_struct *watchdog_task;
>   
> +/*
> + * A bitmask to control what kinds of system info to be printed when
> + * a hung task is detected, it could be task, memory, lock etc. Refer
> + * include/linux/sys_info.h for detailed bit definition.
> + */
> +static unsigned long hung_task_si_mask;
> +
>   #ifdef CONFIG_SMP
>   /*
>    * Should we dump all CPUs backtraces in a hung task event?
> @@ -217,11 +223,8 @@ static inline void debug_show_blocker(struct task_struct *task, unsigned long ti
>   }
>   #endif
>   
> -static void check_hung_task(struct task_struct *t, unsigned long timeout,
> -		unsigned long prev_detect_count)
> +static void check_hung_task(struct task_struct *t, unsigned long timeout)
>   {
> -	unsigned long total_hung_task;
> -
>   	if (!task_is_hung(t, timeout))
>   		return;
>   
> @@ -231,20 +234,13 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout,
>   	 */
>   	sysctl_hung_task_detect_count++;
>   
> -	total_hung_task = sysctl_hung_task_detect_count - prev_detect_count;
>   	trace_sched_process_hang(t);
>   
> -	if (sysctl_hung_task_panic && total_hung_task >= sysctl_hung_task_panic) {
> -		console_verbose();
> -		hung_task_show_lock = true;
> -		hung_task_call_panic = true;
> -	}
> -
>   	/*
>   	 * Ok, the task did not get scheduled for more than 2 minutes,
>   	 * complain:
>   	 */
> -	if (sysctl_hung_task_warnings || hung_task_call_panic) {
> +	if (sysctl_hung_task_warnings) {

It seems like the behavior changes when sysctl_hung_task_warnings is
0 but a panic is about to be triggered ...

Looking at the history:

1) Commit ("hung_task: ignore hung_task_warnings when hung_task_panic
is enabled")[1] ensured that hung task information is always dumped
when a panic is configured, even if the warning counter is exhausted.

2) Later, commit ("hung_task: panic when there are more than N hung
tasks at the same time")[2] refined the logic to trigger a panic based
on the number of hung tasks found in a single scan.

To stay consistent with the established behavior, I think we should
continue to dump the information for hung tasks as long as
sysctl_hung_task_panic is enabled :)

[1] https://lore.kernel.org/all/20240613033159.3446265-1-leonylgao@gmail.com
[2] https://lore.kernel.org/all/20251015063615.2632-1-lirongqing@baidu.com
[...]

Cheers,
Lance
Re: [PATCH v2 2/4] hung_task: Add hung_task_sys_info sysctl to dump sys info on task-hung
Posted by Feng Tang 1 month ago
On Sun, Nov 16, 2025 at 03:58:32PM +0800, Lance Yang wrote:
> 
> 
> On 2025/11/13 19:10, Feng Tang wrote:
> > When task-hung happens, developers may need different kinds of system
> > information (call-stacks, memory info, locks, etc.) to help debugging.
> > 
> > Add 'hung_task_sys_info' sysctl knob to take human readable string like
> > "tasks,mem,timers,locks,ftrace,...", and when task-hung happens, all
> > requested information will be dumped. (refer kernel/sys_info.c for more
> > details).
> > 
> > Meanwhile, the newly introduced sys_info() call is used to unify some
> > existing info-dumping knobs.
> > 
> > Suggested-by: Petr Mladek <pmladek@suse.com>
> > Signed-off-by: Feng Tang <feng.tang@linux.alibaba.com>
> > ---
> >   Documentation/admin-guide/sysctl/kernel.rst |  5 ++
> >   kernel/hung_task.c                          | 62 +++++++++++++--------
> >   2 files changed, 43 insertions(+), 24 deletions(-)
> >   	 * Ok, the task did not get scheduled for more than 2 minutes,
> >   	 * complain:
> >   	 */
> > -	if (sysctl_hung_task_warnings || hung_task_call_panic) {
> > +	if (sysctl_hung_task_warnings) {
> 
> It seems like the behavior changes when sysctl_hung_task_warnings is
> 0 but a panic is about to be triggered ...
> 
> Looking at the history:
> 
> 1) Commit ("hung_task: ignore hung_task_warnings when hung_task_panic
> is enabled")[1] ensured that hung task information is always dumped
> when a panic is configured, even if the warning counter is exhausted.
> 
> 2) Later, commit ("hung_task: panic when there are more than N hung
> tasks at the same time")[2] refined the logic to trigger a panic based
> on the number of hung tasks found in a single scan.
> 
> To stay consistent with the established behavior, I think we should
> continue to dump the information for hung tasks as long as
> sysctl_hung_task_panic is enabled :)
> 
> [1] https://lore.kernel.org/all/20240613033159.3446265-1-leonylgao@gmail.com
> [2] https://lore.kernel.org/all/20251015063615.2632-1-lirongqing@baidu.com
> [...]

Aha, Petr asked similar question during his review. Thanks for the catch!

How about following fixup patch to restore that part of logic? 

Thanks,
Feng

---
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 5b3a7785d3a2..d2254c91450b 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -223,8 +223,11 @@ static inline void debug_show_blocker(struct task_struct *task, unsigned long ti
 }
 #endif
 
-static void check_hung_task(struct task_struct *t, unsigned long timeout)
+static void check_hung_task(struct task_struct *t, unsigned long timeout,
+		unsigned long prev_detect_count)
 {
+	unsigned long total_hung_task;
+
 	if (!task_is_hung(t, timeout))
 		return;
 
@@ -234,13 +237,19 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
 	 */
 	sysctl_hung_task_detect_count++;
 
+	total_hung_task = sysctl_hung_task_detect_count - prev_detect_count;
 	trace_sched_process_hang(t);
 
+	if (sysctl_hung_task_panic && total_hung_task >= sysctl_hung_task_panic) {
+		console_verbose();
+		hung_task_call_panic = true;
+	}
+
 	/*
 	 * Ok, the task did not get scheduled for more than 2 minutes,
 	 * complain:
 	 */
-	if (sysctl_hung_task_warnings) {
+	if (sysctl_hung_task_warnings || hung_task_call_panic) {
 		if (sysctl_hung_task_warnings > 0)
 			sysctl_hung_task_warnings--;
 		pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n",
@@ -295,7 +304,6 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
 {
 	int max_count = sysctl_hung_task_check_count;
 	unsigned long last_break = jiffies;
-	unsigned long total_hung_task;
 	struct task_struct *g, *t;
 	unsigned long prev_detect_count = sysctl_hung_task_detect_count;
 	int need_warning = sysctl_hung_task_warnings;
@@ -320,20 +328,14 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
 			last_break = jiffies;
 		}
 
-		check_hung_task(t, timeout);
+		check_hung_task(t, timeout, prev_detect_count);
 	}
  unlock:
 	rcu_read_unlock();
 
-	total_hung_task = sysctl_hung_task_detect_count - prev_detect_count;
-	if (!total_hung_task)
+	if (!(sysctl_hung_task_detect_count - prev_detect_count))
 		return;
 
-	if (sysctl_hung_task_panic && total_hung_task >= sysctl_hung_task_panic) {
-		console_verbose();
-		hung_task_call_panic = true;
-	}
-
 	if (need_warning || hung_task_call_panic) {
 		si_mask |= SYS_INFO_LOCKS;
Re: [PATCH v2 2/4] hung_task: Add hung_task_sys_info sysctl to dump sys info on task-hung
Posted by Lance Yang 1 month ago

On 2025/11/16 17:11, Feng Tang wrote:
> On Sun, Nov 16, 2025 at 03:58:32PM +0800, Lance Yang wrote:
>>
>>
>> On 2025/11/13 19:10, Feng Tang wrote:
>>> When task-hung happens, developers may need different kinds of system
>>> information (call-stacks, memory info, locks, etc.) to help debugging.
>>>
>>> Add 'hung_task_sys_info' sysctl knob to take human readable string like
>>> "tasks,mem,timers,locks,ftrace,...", and when task-hung happens, all
>>> requested information will be dumped. (refer kernel/sys_info.c for more
>>> details).
>>>
>>> Meanwhile, the newly introduced sys_info() call is used to unify some
>>> existing info-dumping knobs.
>>>
>>> Suggested-by: Petr Mladek <pmladek@suse.com>
>>> Signed-off-by: Feng Tang <feng.tang@linux.alibaba.com>
>>> ---
>>>    Documentation/admin-guide/sysctl/kernel.rst |  5 ++
>>>    kernel/hung_task.c                          | 62 +++++++++++++--------
>>>    2 files changed, 43 insertions(+), 24 deletions(-)
>>>    	 * Ok, the task did not get scheduled for more than 2 minutes,
>>>    	 * complain:
>>>    	 */
>>> -	if (sysctl_hung_task_warnings || hung_task_call_panic) {
>>> +	if (sysctl_hung_task_warnings) {
>>
>> It seems like the behavior changes when sysctl_hung_task_warnings is
>> 0 but a panic is about to be triggered ...
>>
>> Looking at the history:
>>
>> 1) Commit ("hung_task: ignore hung_task_warnings when hung_task_panic
>> is enabled")[1] ensured that hung task information is always dumped
>> when a panic is configured, even if the warning counter is exhausted.
>>
>> 2) Later, commit ("hung_task: panic when there are more than N hung
>> tasks at the same time")[2] refined the logic to trigger a panic based
>> on the number of hung tasks found in a single scan.
>>
>> To stay consistent with the established behavior, I think we should
>> continue to dump the information for hung tasks as long as
>> sysctl_hung_task_panic is enabled :)
>>
>> [1] https://lore.kernel.org/all/20240613033159.3446265-1-leonylgao@gmail.com
>> [2] https://lore.kernel.org/all/20251015063615.2632-1-lirongqing@baidu.com
>> [...]
> 
> Aha, Petr asked similar question during his review. Thanks for the catch!
> 
> How about following fixup patch to restore that part of logic?
> 
> Thanks,
> Feng
> 
> ---
> diff --git a/kernel/hung_task.c b/kernel/hung_task.c
> index 5b3a7785d3a2..d2254c91450b 100644
> --- a/kernel/hung_task.c
> +++ b/kernel/hung_task.c
> @@ -223,8 +223,11 @@ static inline void debug_show_blocker(struct task_struct *task, unsigned long ti
>   }
>   #endif
>   
> -static void check_hung_task(struct task_struct *t, unsigned long timeout)
> +static void check_hung_task(struct task_struct *t, unsigned long timeout,
> +		unsigned long prev_detect_count)
>   {
> +	unsigned long total_hung_task;
> +
>   	if (!task_is_hung(t, timeout))
>   		return;
>   
> @@ -234,13 +237,19 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
>   	 */
>   	sysctl_hung_task_detect_count++;
>   
> +	total_hung_task = sysctl_hung_task_detect_count - prev_detect_count;
>   	trace_sched_process_hang(t);
>   
> +	if (sysctl_hung_task_panic && total_hung_task >= sysctl_hung_task_panic) {
> +		console_verbose();
> +		hung_task_call_panic = true;
> +	}
> +
>   	/*
>   	 * Ok, the task did not get scheduled for more than 2 minutes,
>   	 * complain:
>   	 */
> -	if (sysctl_hung_task_warnings) {
> +	if (sysctl_hung_task_warnings || hung_task_call_panic) {
>   		if (sysctl_hung_task_warnings > 0)
>   			sysctl_hung_task_warnings--;
>   		pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n",
> @@ -295,7 +304,6 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
>   {
>   	int max_count = sysctl_hung_task_check_count;
>   	unsigned long last_break = jiffies;
> -	unsigned long total_hung_task;
>   	struct task_struct *g, *t;
>   	unsigned long prev_detect_count = sysctl_hung_task_detect_count;
>   	int need_warning = sysctl_hung_task_warnings;
> @@ -320,20 +328,14 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
>   			last_break = jiffies;
>   		}
>   
> -		check_hung_task(t, timeout);
> +		check_hung_task(t, timeout, prev_detect_count);
>   	}
>    unlock:
>   	rcu_read_unlock();
>   
> -	total_hung_task = sysctl_hung_task_detect_count - prev_detect_count;
> -	if (!total_hung_task)
> +	if (!(sysctl_hung_task_detect_count - prev_detect_count))
>   		return;
>   
> -	if (sysctl_hung_task_panic && total_hung_task >= sysctl_hung_task_panic) {
> -		console_verbose();
> -		hung_task_call_panic = true;
> -	}
> -
>   	if (need_warning || hung_task_call_panic) {
>   		si_mask |= SYS_INFO_LOCKS;

Looks good to me now! I assume v3 would be expected, can you
post a new version?

Cheers,
Lance
Re: [PATCH v2 2/4] hung_task: Add hung_task_sys_info sysctl to dump sys info on task-hung
Posted by Feng Tang 1 month ago
On Sun, Nov 16, 2025 at 09:22:43PM +0800, Lance Yang wrote:
> > > Looking at the history:
> > > 
> > > 1) Commit ("hung_task: ignore hung_task_warnings when hung_task_panic
> > > is enabled")[1] ensured that hung task information is always dumped
> > > when a panic is configured, even if the warning counter is exhausted.
> > > 
> > > 2) Later, commit ("hung_task: panic when there are more than N hung
> > > tasks at the same time")[2] refined the logic to trigger a panic based
> > > on the number of hung tasks found in a single scan.
> > > 
> > > To stay consistent with the established behavior, I think we should
> > > continue to dump the information for hung tasks as long as
> > > sysctl_hung_task_panic is enabled :)
> > > 
> > > [1] https://lore.kernel.org/all/20240613033159.3446265-1-leonylgao@gmail.com
> > > [2] https://lore.kernel.org/all/20251015063615.2632-1-lirongqing@baidu.com
> > > [...]
> > 
> > Aha, Petr asked similar question during his review. Thanks for the catch!
> > 
> > How about following fixup patch to restore that part of logic?
> > 
> > Thanks,
> > Feng
> > 
> > ---
> > diff --git a/kernel/hung_task.c b/kernel/hung_task.c
> > index 5b3a7785d3a2..d2254c91450b 100644
> > --- a/kernel/hung_task.c
> > +++ b/kernel/hung_task.c
> > @@ -223,8 +223,11 @@ static inline void debug_show_blocker(struct task_struct *task, unsigned long ti
> >   }
> >   #endif
> > -static void check_hung_task(struct task_struct *t, unsigned long timeout)
> > +static void check_hung_task(struct task_struct *t, unsigned long timeout,
> > +		unsigned long prev_detect_count)
> >   {
> > +	unsigned long total_hung_task;
> > +
> >   	if (!task_is_hung(t, timeout))
> >   		return;
> > @@ -234,13 +237,19 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
> >   	 */
> >   	sysctl_hung_task_detect_count++;
> > +	total_hung_task = sysctl_hung_task_detect_count - prev_detect_count;
> >   	trace_sched_process_hang(t);
> > +	if (sysctl_hung_task_panic && total_hung_task >= sysctl_hung_task_panic) {
> > +		console_verbose();
> > +		hung_task_call_panic = true;
> > +	}
> > +
> >   	/*
> >   	 * Ok, the task did not get scheduled for more than 2 minutes,
> >   	 * complain:
> >   	 */
> > -	if (sysctl_hung_task_warnings) {
> > +	if (sysctl_hung_task_warnings || hung_task_call_panic) {
> >   		if (sysctl_hung_task_warnings > 0)
> >   			sysctl_hung_task_warnings--;
> >   		pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n",
> > @@ -295,7 +304,6 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
> >   {
> >   	int max_count = sysctl_hung_task_check_count;
> >   	unsigned long last_break = jiffies;
> > -	unsigned long total_hung_task;
> >   	struct task_struct *g, *t;
> >   	unsigned long prev_detect_count = sysctl_hung_task_detect_count;
> >   	int need_warning = sysctl_hung_task_warnings;
> > @@ -320,20 +328,14 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
> >   			last_break = jiffies;
> >   		}
> > -		check_hung_task(t, timeout);
> > +		check_hung_task(t, timeout, prev_detect_count);
> >   	}
> >    unlock:
> >   	rcu_read_unlock();
> > -	total_hung_task = sysctl_hung_task_detect_count - prev_detect_count;
> > -	if (!total_hung_task)
> > +	if (!(sysctl_hung_task_detect_count - prev_detect_count))
> >   		return;
> > -	if (sysctl_hung_task_panic && total_hung_task >= sysctl_hung_task_panic) {
> > -		console_verbose();
> > -		hung_task_call_panic = true;
> > -	}
> > -
> >   	if (need_warning || hung_task_call_panic) {
> >   		si_mask |= SYS_INFO_LOCKS;
> 
> Looks good to me now! I assume v3 would be expected, can you
> post a new version?

Andrew has taken the patchset to -mm tree. 

Andrew, which way do you prefer? I send a v3 patch for hung-task or you
pickup the fixup patch and squash it into the orginal 0002 patch?

Anyway, I make a squshed version v3 patch below.

Thanks,
Feng

---
From f90a60dae2440c89da7151fb1ddac022b872fb69 Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@linux.alibaba.com>
Date: Wed, 5 Nov 2025 19:30:36 +0800
Subject: [PATCH v3] hung_task: Add hung_task_sys_info sysctl to dump sys info on task-hung

When task-hung happens, developers may need different kinds of system
information (call-stacks, memory info, locks, etc.) to help debugging.

Add 'hung_task_sys_info' sysctl knob to take human readable string like
"tasks,mem,timers,locks,ftrace,...", and when task-hung happens, all
requested information will be dumped. (refer kernel/sys_info.c for more
details).

Meanwhile, the newly introduced sys_info() call is used to unify some
existing info-dumping knobs.

Suggested-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Feng Tang <feng.tang@linux.alibaba.com>
---
Changelog:

  v3:
  * restore hung_task_call_panic logic (Lance)

  v2:
  * code cleanup for si_mask setup (Petr)

 Documentation/admin-guide/sysctl/kernel.rst |  5 +++
 kernel/hung_task.c                          | 40 ++++++++++++++-------
 2 files changed, 33 insertions(+), 12 deletions(-)

diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index a397eeccaea7..45b4408dad31 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -422,6 +422,11 @@ the system boot.
 
 This file shows up if ``CONFIG_DETECT_HUNG_TASK`` is enabled.
 
+hung_task_sys_info
+==================
+A comma separated list of extra system information to be dumped when
+hung task is detected, for example, "tasks,mem,timers,locks,...".
+Refer 'panic_sys_info' section below for more details.
 
 hung_task_timeout_secs
 ======================
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 5ac0e66a1361..d2254c91450b 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -24,6 +24,7 @@
 #include <linux/sched/sysctl.h>
 #include <linux/hung_task.h>
 #include <linux/rwsem.h>
+#include <linux/sys_info.h>
 
 #include <trace/events/sched.h>
 
@@ -59,12 +60,17 @@ static unsigned long __read_mostly sysctl_hung_task_check_interval_secs;
 static int __read_mostly sysctl_hung_task_warnings = 10;
 
 static int __read_mostly did_panic;
-static bool hung_task_show_lock;
 static bool hung_task_call_panic;
-static bool hung_task_show_all_bt;
 
 static struct task_struct *watchdog_task;
 
+/*
+ * A bitmask to control what kinds of system info to be printed when
+ * a hung task is detected, it could be task, memory, lock etc. Refer
+ * include/linux/sys_info.h for detailed bit definition.
+ */
+static unsigned long hung_task_si_mask;
+
 #ifdef CONFIG_SMP
 /*
  * Should we dump all CPUs backtraces in a hung task event?
@@ -236,7 +242,6 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout,
 
 	if (sysctl_hung_task_panic && total_hung_task >= sysctl_hung_task_panic) {
 		console_verbose();
-		hung_task_show_lock = true;
 		hung_task_call_panic = true;
 	}
 
@@ -259,10 +264,7 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout,
 			" disables this message.\n");
 		sched_show_task(t);
 		debug_show_blocker(t, timeout);
-		hung_task_show_lock = true;
 
-		if (sysctl_hung_task_all_cpu_backtrace)
-			hung_task_show_all_bt = true;
 		if (!sysctl_hung_task_warnings)
 			pr_info("Future hung task reports are suppressed, see sysctl kernel.hung_task_warnings\n");
 	}
@@ -304,6 +306,8 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
 	unsigned long last_break = jiffies;
 	struct task_struct *g, *t;
 	unsigned long prev_detect_count = sysctl_hung_task_detect_count;
+	int need_warning = sysctl_hung_task_warnings;
+	unsigned long si_mask = hung_task_si_mask;
 
 	/*
 	 * If the system crashed already then all bets are off,
@@ -312,7 +316,7 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
 	if (test_taint(TAINT_DIE) || did_panic)
 		return;
 
-	hung_task_show_lock = false;
+
 	rcu_read_lock();
 	for_each_process_thread(g, t) {
 
@@ -328,14 +332,19 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
 	}
  unlock:
 	rcu_read_unlock();
-	if (hung_task_show_lock)
-		debug_show_all_locks();
 
-	if (hung_task_show_all_bt) {
-		hung_task_show_all_bt = false;
-		trigger_all_cpu_backtrace();
+	if (!(sysctl_hung_task_detect_count - prev_detect_count))
+		return;
+
+	if (need_warning || hung_task_call_panic) {
+		si_mask |= SYS_INFO_LOCKS;
+
+		if (sysctl_hung_task_all_cpu_backtrace)
+			si_mask |= SYS_INFO_ALL_BT;
 	}
 
+	sys_info(si_mask);
+
 	if (hung_task_call_panic)
 		panic("hung_task: blocked tasks");
 }
@@ -434,6 +443,13 @@ static const struct ctl_table hung_task_sysctls[] = {
 		.mode		= 0444,
 		.proc_handler	= proc_doulongvec_minmax,
 	},
+	{
+		.procname	= "hung_task_sys_info",
+		.data		= &hung_task_si_mask,
+		.maxlen         = sizeof(hung_task_si_mask),
+		.mode		= 0644,
+		.proc_handler	= sysctl_sys_info_handler,
+	},
 };
 
 static void __init hung_task_sysctl_init(void)
-- 
2.43.5
Re: [PATCH v2 2/4] hung_task: Add hung_task_sys_info sysctl to dump sys info on task-hung
Posted by Andrew Morton 1 month ago
On Sun, 16 Nov 2025 22:13:58 +0800 Feng Tang <feng.tang@linux.alibaba.com> wrote:

> > >   	if (need_warning || hung_task_call_panic) {
> > >   		si_mask |= SYS_INFO_LOCKS;
> > 
> > Looks good to me now! I assume v3 would be expected, can you
> > post a new version?
> 
> Andrew has taken the patchset to -mm tree. 
> 
> Andrew, which way do you prefer? I send a v3 patch for hung-task or you
> pickup the fixup patch and squash it into the orginal 0002 patch?
> 
> Anyway, I make a squshed version v3 patch below.

I prefer little fixup patches, generally.  So people can see what
changed and don't feel they should re-review everything.

I queued the below, thanks.


From: Feng Tang <feng.tang@linux.alibaba.com>
Subject: hung_task-add-hung_task_sys_info-sysctl-to-dump-sys-info-on-task-hung-fix
Date: Wed, 5 Nov 2025 19:30:36 +0800

maintain consistecy established behavior, per Lance and Petr

Link: https://lkml.kernel.org/r/aRncJo1mA5Zk77Hr@U-2FWC9VHC-2323.local
Suggested-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Feng Tang <feng.tang@linux.alibaba.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Lance Yang <ioworker0@gmail.com>
Cc: "Paul E . McKenney" <paulmck@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 kernel/hung_task.c |   24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

--- a/kernel/hung_task.c~hung_task-add-hung_task_sys_info-sysctl-to-dump-sys-info-on-task-hung-fix
+++ a/kernel/hung_task.c
@@ -223,8 +223,11 @@ static inline void debug_show_blocker(st
 }
 #endif
 
-static void check_hung_task(struct task_struct *t, unsigned long timeout)
+static void check_hung_task(struct task_struct *t, unsigned long timeout,
+		unsigned long prev_detect_count)
 {
+	unsigned long total_hung_task;
+
 	if (!task_is_hung(t, timeout))
 		return;
 
@@ -234,13 +237,19 @@ static void check_hung_task(struct task_
 	 */
 	sysctl_hung_task_detect_count++;
 
+	total_hung_task = sysctl_hung_task_detect_count - prev_detect_count;
 	trace_sched_process_hang(t);
 
+	if (sysctl_hung_task_panic && total_hung_task >= sysctl_hung_task_panic) {
+		console_verbose();
+		hung_task_call_panic = true;
+	}
+
 	/*
 	 * Ok, the task did not get scheduled for more than 2 minutes,
 	 * complain:
 	 */
-	if (sysctl_hung_task_warnings) {
+	if (sysctl_hung_task_warnings || hung_task_call_panic) {
 		if (sysctl_hung_task_warnings > 0)
 			sysctl_hung_task_warnings--;
 		pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n",
@@ -295,7 +304,6 @@ static void check_hung_uninterruptible_t
 {
 	int max_count = sysctl_hung_task_check_count;
 	unsigned long last_break = jiffies;
-	unsigned long total_hung_task;
 	struct task_struct *g, *t;
 	unsigned long prev_detect_count = sysctl_hung_task_detect_count;
 	int need_warning = sysctl_hung_task_warnings;
@@ -320,20 +328,14 @@ static void check_hung_uninterruptible_t
 			last_break = jiffies;
 		}
 
-		check_hung_task(t, timeout);
+		check_hung_task(t, timeout, prev_detect_count);
 	}
  unlock:
 	rcu_read_unlock();
 
-	total_hung_task = sysctl_hung_task_detect_count - prev_detect_count;
-	if (!total_hung_task)
+	if (!(sysctl_hung_task_detect_count - prev_detect_count))
 		return;
 
-	if (sysctl_hung_task_panic && total_hung_task >= sysctl_hung_task_panic) {
-		console_verbose();
-		hung_task_call_panic = true;
-	}
-
 	if (need_warning || hung_task_call_panic) {
 		si_mask |= SYS_INFO_LOCKS;
 
_
Re: [PATCH v2 2/4] hung_task: Add hung_task_sys_info sysctl to dump sys info on task-hung
Posted by Petr Mladek 1 month ago
On Mon 2025-11-17 09:53:52, Andrew Morton wrote:
> On Sun, 16 Nov 2025 22:13:58 +0800 Feng Tang <feng.tang@linux.alibaba.com> wrote:
> 
> > > >   	if (need_warning || hung_task_call_panic) {
> > > >   		si_mask |= SYS_INFO_LOCKS;
> > > 
> > > Looks good to me now! I assume v3 would be expected, can you
> > > post a new version?
> > 
> > Andrew has taken the patchset to -mm tree. 
> > 
> > Andrew, which way do you prefer? I send a v3 patch for hung-task or you
> > pickup the fixup patch and squash it into the orginal 0002 patch?
> > 
> > Anyway, I make a squshed version v3 patch below.
> 
> I prefer little fixup patches, generally.  So people can see what
> changed and don't feel they should re-review everything.
> 
> I queued the below, thanks.
> 
> From: Feng Tang <feng.tang@linux.alibaba.com>
> Subject: hung_task-add-hung_task_sys_info-sysctl-to-dump-sys-info-on-task-hung-fix
> Date: Wed, 5 Nov 2025 19:30:36 +0800
> 
> maintain consistecy established behavior, per Lance and Petr
> 
> Link: https://lkml.kernel.org/r/aRncJo1mA5Zk77Hr@U-2FWC9VHC-2323.local
> Suggested-by: Petr Mladek <pmladek@suse.com>
> Signed-off-by: Feng Tang <feng.tang@linux.alibaba.com>
> Cc: Jonathan Corbet <corbet@lwn.net>
> Cc: Lance Yang <ioworker0@gmail.com>
> Cc: "Paul E . McKenney" <paulmck@kernel.org>
> Cc: Steven Rostedt <rostedt@goodmis.org>
> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

Thanks a lot for catching and fixing the regression caused
by this patchset. The patch looks good.

See a comment below.

> --- a/kernel/hung_task.c~hung_task-add-hung_task_sys_info-sysctl-to-dump-sys-info-on-task-hung-fix
> +++ a/kernel/hung_task.c
> @@ -223,8 +223,11 @@ static inline void debug_show_blocker(st
>  }
>  #endif
>  
> -static void check_hung_task(struct task_struct *t, unsigned long timeout)
> +static void check_hung_task(struct task_struct *t, unsigned long timeout,
> +		unsigned long prev_detect_count)
>  {
> +	unsigned long total_hung_task;
> +
>  	if (!task_is_hung(t, timeout))
>  		return;
>  
> @@ -234,13 +237,19 @@ static void check_hung_task(struct task_
>  	 */
>  	sysctl_hung_task_detect_count++;
>  
> +	total_hung_task = sysctl_hung_task_detect_count - prev_detect_count;
>  	trace_sched_process_hang(t);
>  
> +	if (sysctl_hung_task_panic && total_hung_task >= sysctl_hung_task_panic) {
> +		console_verbose();
> +		hung_task_call_panic = true;
> +	}
> +
>  	/*
>  	 * Ok, the task did not get scheduled for more than 2 minutes,
>  	 * complain:
>  	 */
> -	if (sysctl_hung_task_warnings) {
> +	if (sysctl_hung_task_warnings || hung_task_call_panic) {
>  		if (sysctl_hung_task_warnings > 0)
>  			sysctl_hung_task_warnings--;
>  		pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n",

This restores the behavior after the commit 9544f9e6947f6508
("hung_task: panic when there are more than N hung tasks at
the same time"). It is better than nothing.

Well, the behavior is still not ideal. It would be better when
we printed backtraces from _all_ "hung" tasks before panicking.
But it prints the backtraces only when sysctl_hung_task_panic
limit is reached.

I mean, for example, let's have:

  + sysctl_hung_task_warnings = 2;
  + sysctl_hung_task_panic = 5;
  + and detect 6 hung tasks.

The code will report 1st and 2nd hung tasks. It will skip 3rd and 4th
because sysctl_hung_task_warnings reached 0. It will report 5th and
6th tasks because (total_hung_task >= 5).

It is better than nothing. But it might be confusing.

I am not sure how to fix it. A minimalist solution would be to print
a warning. Something like:

	if (sysctl_hung_task_panic > 1 &&
	    (total_hung_task == sysctl_hung_task_panic) &&
	    !sysctl_hung_task_warnings) {
		pr_err("INFO: %d blocked tasks might have been skipped because reached hung_task_warnings limit\n",
			sysctl_hung_task_panic - 1);

Or we could print the "total_hung_task" counter somewhere, for
example,

		pr_err("INFO[%lu]: task %s:%d blocked for more than %ld seconds.\n",
			total_hung_task, ...

Or we could restart the for_each_process_thread() cycle and make sure
that all hung tasks will get reported.

Or we could ignore it until anyone complains.

Best Regards,
Petr
Re: [PATCH v2 2/4] hung_task: Add hung_task_sys_info sysctl to dump sys info on task-hung
Posted by Lance Yang 1 month ago

On 2025/11/18 23:20, Petr Mladek wrote:
> On Mon 2025-11-17 09:53:52, Andrew Morton wrote:
>> On Sun, 16 Nov 2025 22:13:58 +0800 Feng Tang <feng.tang@linux.alibaba.com> wrote:
>>
>>>>>    	if (need_warning || hung_task_call_panic) {
>>>>>    		si_mask |= SYS_INFO_LOCKS;
>>>>
>>>> Looks good to me now! I assume v3 would be expected, can you
>>>> post a new version?
>>>
>>> Andrew has taken the patchset to -mm tree.
>>>
>>> Andrew, which way do you prefer? I send a v3 patch for hung-task or you
>>> pickup the fixup patch and squash it into the orginal 0002 patch?
>>>
>>> Anyway, I make a squshed version v3 patch below.
>>
>> I prefer little fixup patches, generally.  So people can see what
>> changed and don't feel they should re-review everything.
>>
>> I queued the below, thanks.
>>
>> From: Feng Tang <feng.tang@linux.alibaba.com>
>> Subject: hung_task-add-hung_task_sys_info-sysctl-to-dump-sys-info-on-task-hung-fix
>> Date: Wed, 5 Nov 2025 19:30:36 +0800
>>
>> maintain consistecy established behavior, per Lance and Petr
>>
>> Link: https://lkml.kernel.org/r/aRncJo1mA5Zk77Hr@U-2FWC9VHC-2323.local
>> Suggested-by: Petr Mladek <pmladek@suse.com>
>> Signed-off-by: Feng Tang <feng.tang@linux.alibaba.com>
>> Cc: Jonathan Corbet <corbet@lwn.net>
>> Cc: Lance Yang <ioworker0@gmail.com>
>> Cc: "Paul E . McKenney" <paulmck@kernel.org>
>> Cc: Steven Rostedt <rostedt@goodmis.org>
>> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
> 
> Thanks a lot for catching and fixing the regression caused
> by this patchset. The patch looks good.
> 
> See a comment below.
> 
>> --- a/kernel/hung_task.c~hung_task-add-hung_task_sys_info-sysctl-to-dump-sys-info-on-task-hung-fix
>> +++ a/kernel/hung_task.c
>> @@ -223,8 +223,11 @@ static inline void debug_show_blocker(st
>>   }
>>   #endif
>>   
>> -static void check_hung_task(struct task_struct *t, unsigned long timeout)
>> +static void check_hung_task(struct task_struct *t, unsigned long timeout,
>> +		unsigned long prev_detect_count)
>>   {
>> +	unsigned long total_hung_task;
>> +
>>   	if (!task_is_hung(t, timeout))
>>   		return;
>>   
>> @@ -234,13 +237,19 @@ static void check_hung_task(struct task_
>>   	 */
>>   	sysctl_hung_task_detect_count++;
>>   
>> +	total_hung_task = sysctl_hung_task_detect_count - prev_detect_count;
>>   	trace_sched_process_hang(t);
>>   
>> +	if (sysctl_hung_task_panic && total_hung_task >= sysctl_hung_task_panic) {
>> +		console_verbose();
>> +		hung_task_call_panic = true;
>> +	}
>> +
>>   	/*
>>   	 * Ok, the task did not get scheduled for more than 2 minutes,
>>   	 * complain:
>>   	 */
>> -	if (sysctl_hung_task_warnings) {
>> +	if (sysctl_hung_task_warnings || hung_task_call_panic) {
>>   		if (sysctl_hung_task_warnings > 0)
>>   			sysctl_hung_task_warnings--;
>>   		pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n",
> 
> This restores the behavior after the commit 9544f9e6947f6508
> ("hung_task: panic when there are more than N hung tasks at
> the same time"). It is better than nothing.
> 
> Well, the behavior is still not ideal. It would be better when
> we printed backtraces from _all_ "hung" tasks before panicking.
> But it prints the backtraces only when sysctl_hung_task_panic
> limit is reached.
> 
> I mean, for example, let's have:
> 
>    + sysctl_hung_task_warnings = 2;
>    + sysctl_hung_task_panic = 5;
>    + and detect 6 hung tasks.
> 
> The code will report 1st and 2nd hung tasks. It will skip 3rd and 4th
> because sysctl_hung_task_warnings reached 0. It will report 5th and
> 6th tasks because (total_hung_task >= 5).
> 
> It is better than nothing. But it might be confusing.

Right, I can see how it might be confusing.

IMHO, sysctl_hung_task_warnings is a user-configured limit on verbosity.
It makes sense that reports are suppressed after the limit is exhausted,
except when the sysctl_hung_task_panic threshold is reached ;)

> 
> I am not sure how to fix it. A minimalist solution would be to print
> a warning. Something like:
> 
> 	if (sysctl_hung_task_panic > 1 &&
> 	    (total_hung_task == sysctl_hung_task_panic) &&
> 	    !sysctl_hung_task_warnings) {
> 		pr_err("INFO: %d blocked tasks might have been skipped because reached hung_task_warnings limit\n",
> 			sysctl_hung_task_panic - 1);
> 
> Or we could print the "total_hung_task" counter somewhere, for
> example,
> 
> 		pr_err("INFO[%lu]: task %s:%d blocked for more than %ld seconds.\n",
> 			total_hung_task, ...
> 
> Or we could restart the for_each_process_thread() cycle and make sure
> that all hung tasks will get reported.
> 
> Or we could ignore it until anyone complains.

It looks like we already inform the user when that happens. When
sysctl_hung_task_warnings is finally decremented to zero, the code prints:


```
if (!sysctl_hung_task_warnings)
	pr_info("Future hung task reports are suppressed, see sysctl 
kernel.hung_task_warnings\n");
```

Given that this explicit warning is already in place, perhaps the current
behavior is sufficient and clear enough?

Thanks,
Lance
Re: [PATCH v2 2/4] hung_task: Add hung_task_sys_info sysctl to dump sys info on task-hung
Posted by Petr Mladek 1 month ago
On Wed 2025-11-19 01:57:36, Lance Yang wrote:
> On 2025/11/18 23:20, Petr Mladek wrote:
> > Well, the behavior is still not ideal. It would be better when
> > we printed backtraces from _all_ "hung" tasks before panicking.
> > But it prints the backtraces only when sysctl_hung_task_panic
> > limit is reached.
> > 
> > I mean, for example, let's have:
> > 
> >    + sysctl_hung_task_warnings = 2;
> >    + sysctl_hung_task_panic = 5;
> >    + and detect 6 hung tasks.
> > 
> > The code will report 1st and 2nd hung tasks. It will skip 3rd and 4th
> > because sysctl_hung_task_warnings reached 0. It will report 5th and
> > 6th tasks because (total_hung_task >= 5).
> > 
> > It is better than nothing. But it might be confusing.
> 
> Right, I can see how it might be confusing.
> 
> IMHO, sysctl_hung_task_warnings is a user-configured limit on verbosity.
> It makes sense that reports are suppressed after the limit is exhausted,
> except when the sysctl_hung_task_panic threshold is reached ;)
> 
> > I am not sure how to fix it. A minimalist solution would be to print
> > a warning. Something like:
> > 
> > 	if (sysctl_hung_task_panic > 1 &&
> > 	    (total_hung_task == sysctl_hung_task_panic) &&
> > 	    !sysctl_hung_task_warnings) {
> > 		pr_err("INFO: %d blocked tasks might have been skipped because reached hung_task_warnings limit\n",
> > 			sysctl_hung_task_panic - 1);
> > 
> > Or we could print the "total_hung_task" counter somewhere, for
> > example,
> > 
> > 		pr_err("INFO[%lu]: task %s:%d blocked for more than %ld seconds.\n",
> > 			total_hung_task, ...
> > 
> > Or we could restart the for_each_process_thread() cycle and make sure
> > that all hung tasks will get reported.
> > 
> > Or we could ignore it until anyone complains.
> 
> It looks like we already inform the user when that happens. When
> sysctl_hung_task_warnings is finally decremented to zero, the code prints:
> 
> ```
> if (!sysctl_hung_task_warnings)
> 	pr_info("Future hung task reports are suppressed, see sysctl
> kernel.hung_task_warnings\n");
> ```
> 
> Given that this explicit warning is already in place, perhaps the current
> behavior is sufficient and clear enough?

The warning might get lost or it might happen long time before
critical stall so people might miss it.

But you are right. There is a warning. And my worries are rather
theoretical. Let's keep the code simple until anyone complains.

Best Regards,
Petr
Re: [PATCH v2 2/4] hung_task: Add hung_task_sys_info sysctl to dump sys info on task-hung
Posted by Lance Yang 1 month ago

On 2025/11/18 01:53, Andrew Morton wrote:
> On Sun, 16 Nov 2025 22:13:58 +0800 Feng Tang <feng.tang@linux.alibaba.com> wrote:
> 
>>>>    	if (need_warning || hung_task_call_panic) {
>>>>    		si_mask |= SYS_INFO_LOCKS;
>>>
>>> Looks good to me now! I assume v3 would be expected, can you
>>> post a new version?
>>
>> Andrew has taken the patchset to -mm tree.
>>
>> Andrew, which way do you prefer? I send a v3 patch for hung-task or you
>> pickup the fixup patch and squash it into the orginal 0002 patch?
>>
>> Anyway, I make a squshed version v3 patch below.
> 
> I prefer little fixup patches, generally.  So people can see what
> changed and don't feel they should re-review everything.
> 
> I queued the below, thanks.

Thanks!

> 
> 
> From: Feng Tang <feng.tang@linux.alibaba.com>
> Subject: hung_task-add-hung_task_sys_info-sysctl-to-dump-sys-info-on-task-hung-fix
> Date: Wed, 5 Nov 2025 19:30:36 +0800
> 
> maintain consistecy established behavior, per Lance and Petr
> 
> Link: https://lkml.kernel.org/r/aRncJo1mA5Zk77Hr@U-2FWC9VHC-2323.local
> Suggested-by: Petr Mladek <pmladek@suse.com>
> Signed-off-by: Feng Tang <feng.tang@linux.alibaba.com>
> Cc: Jonathan Corbet <corbet@lwn.net>
> Cc: Lance Yang <ioworker0@gmail.com>
> Cc: "Paul E . McKenney" <paulmck@kernel.org>
> Cc: Steven Rostedt <rostedt@goodmis.org>
> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
> ---

With this fix, #02 patch looks good to me!

Reviewed-by: Lance Yang <lance.yang@linux.dev>

> 
>   kernel/hung_task.c |   24 +++++++++++++-----------
>   1 file changed, 13 insertions(+), 11 deletions(-)
> 
> --- a/kernel/hung_task.c~hung_task-add-hung_task_sys_info-sysctl-to-dump-sys-info-on-task-hung-fix
> +++ a/kernel/hung_task.c
> @@ -223,8 +223,11 @@ static inline void debug_show_blocker(st
>   }
>   #endif
>   
> -static void check_hung_task(struct task_struct *t, unsigned long timeout)
> +static void check_hung_task(struct task_struct *t, unsigned long timeout,
> +		unsigned long prev_detect_count)
>   {
> +	unsigned long total_hung_task;
> +
>   	if (!task_is_hung(t, timeout))
>   		return;
>   
> @@ -234,13 +237,19 @@ static void check_hung_task(struct task_
>   	 */
>   	sysctl_hung_task_detect_count++;
>   
> +	total_hung_task = sysctl_hung_task_detect_count - prev_detect_count;
>   	trace_sched_process_hang(t);
>   
> +	if (sysctl_hung_task_panic && total_hung_task >= sysctl_hung_task_panic) {
> +		console_verbose();
> +		hung_task_call_panic = true;
> +	}
> +
>   	/*
>   	 * Ok, the task did not get scheduled for more than 2 minutes,
>   	 * complain:
>   	 */
> -	if (sysctl_hung_task_warnings) {
> +	if (sysctl_hung_task_warnings || hung_task_call_panic) {
>   		if (sysctl_hung_task_warnings > 0)
>   			sysctl_hung_task_warnings--;
>   		pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n",
> @@ -295,7 +304,6 @@ static void check_hung_uninterruptible_t
>   {
>   	int max_count = sysctl_hung_task_check_count;
>   	unsigned long last_break = jiffies;
> -	unsigned long total_hung_task;
>   	struct task_struct *g, *t;
>   	unsigned long prev_detect_count = sysctl_hung_task_detect_count;
>   	int need_warning = sysctl_hung_task_warnings;
> @@ -320,20 +328,14 @@ static void check_hung_uninterruptible_t
>   			last_break = jiffies;
>   		}
>   
> -		check_hung_task(t, timeout);
> +		check_hung_task(t, timeout, prev_detect_count);
>   	}
>    unlock:
>   	rcu_read_unlock();
>   
> -	total_hung_task = sysctl_hung_task_detect_count - prev_detect_count;
> -	if (!total_hung_task)
> +	if (!(sysctl_hung_task_detect_count - prev_detect_count))
>   		return;
>   
> -	if (sysctl_hung_task_panic && total_hung_task >= sysctl_hung_task_panic) {
> -		console_verbose();
> -		hung_task_call_panic = true;
> -	}
> -
>   	if (need_warning || hung_task_call_panic) {
>   		si_mask |= SYS_INFO_LOCKS;
>   
> _
>
Re: [PATCH v2 2/4] hung_task: Add hung_task_sys_info sysctl to dump sys info on task-hung
Posted by Feng Tang 1 month ago
On Mon, Nov 17, 2025 at 09:53:52AM -0800, Andrew Morton wrote:
> On Sun, 16 Nov 2025 22:13:58 +0800 Feng Tang <feng.tang@linux.alibaba.com> wrote:
> 
> > > >   	if (need_warning || hung_task_call_panic) {
> > > >   		si_mask |= SYS_INFO_LOCKS;
> > > 
> > > Looks good to me now! I assume v3 would be expected, can you
> > > post a new version?
> > 
> > Andrew has taken the patchset to -mm tree. 
> > 
> > Andrew, which way do you prefer? I send a v3 patch for hung-task or you
> > pickup the fixup patch and squash it into the orginal 0002 patch?
> > 
> > Anyway, I make a squshed version v3 patch below.
> 
> I prefer little fixup patches, generally.  So people can see what
> changed and don't feel they should re-review everything.

I see now.

> I queued the below, thanks.
 
Thank you! I just run some tests with latest mm tree and they all passed.

- Feng
Re: [PATCH v2 2/4] hung_task: Add hung_task_sys_info sysctl to dump sys info on task-hung
Posted by Petr Mladek 1 month ago
On Thu 2025-11-13 19:10:37, Feng Tang wrote:
> When task-hung happens, developers may need different kinds of system
> information (call-stacks, memory info, locks, etc.) to help debugging.
> 
> Add 'hung_task_sys_info' sysctl knob to take human readable string like
> "tasks,mem,timers,locks,ftrace,...", and when task-hung happens, all
> requested information will be dumped. (refer kernel/sys_info.c for more
> details).
> 
> Meanwhile, the newly introduced sys_info() call is used to unify some
> existing info-dumping knobs.
> 
> Suggested-by: Petr Mladek <pmladek@suse.com>
> Signed-off-by: Feng Tang <feng.tang@linux.alibaba.com>

It would have been better to split the refactoring (moving some logic
from check_hung_task()) into a separate patch.

But the result looks good. Feel free to use:

Reviewed-by: Petr Mladek <pmladek@suse.com>

Best Regards,
Petr
Re: [PATCH v2 2/4] hung_task: Add hung_task_sys_info sysctl to dump sys info on task-hung
Posted by Feng Tang 1 month ago
On Fri, Nov 14, 2025 at 04:36:39PM +0100, Petr Mladek wrote:
> On Thu 2025-11-13 19:10:37, Feng Tang wrote:
> > When task-hung happens, developers may need different kinds of system
> > information (call-stacks, memory info, locks, etc.) to help debugging.
> > 
> > Add 'hung_task_sys_info' sysctl knob to take human readable string like
> > "tasks,mem,timers,locks,ftrace,...", and when task-hung happens, all
> > requested information will be dumped. (refer kernel/sys_info.c for more
> > details).
> > 
> > Meanwhile, the newly introduced sys_info() call is used to unify some
> > existing info-dumping knobs.
> > 
> > Suggested-by: Petr Mladek <pmladek@suse.com>
> > Signed-off-by: Feng Tang <feng.tang@linux.alibaba.com>
> 
> It would have been better to split the refactoring (moving some logic
> from check_hung_task()) into a separate patch.

Yes, it would be cleaner to  have a functional patch and a clenup one.
Will pay more attention in the future.

> But the result looks good. Feel free to use:
> 
> Reviewed-by: Petr Mladek <pmladek@suse.com>
 
Thank you!

- Feng