kernel/hung_task.c | 78 +++++++++++++++++++++++++++------------------------- 1 file changed, 41 insertions(+), 37 deletions(-)
From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Dump the lock blocker task if it is not hung because if the blocker
task is also hung, it should be dumped by the detector. This will
de-duplicate the same stackdumps if the blocker task is also blocked
by another task (and hung).
Suggested-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Acked-by: Lance Yang <lance.yang@linux.dev>
---
Changes in v2:
- Rebase on next-20250730
- Merge task state checks.
- Add Lance's ack.
---
kernel/hung_task.c | 78 +++++++++++++++++++++++++++-------------------------
1 file changed, 41 insertions(+), 37 deletions(-)
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 8708a1205f82..b2c1f14b8129 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -95,9 +95,41 @@ static struct notifier_block panic_block = {
.notifier_call = hung_task_panic,
};
+static bool task_is_hung(struct task_struct *t, unsigned long timeout)
+{
+ unsigned long switch_count = t->nvcsw + t->nivcsw;
+ unsigned int state = READ_ONCE(t->__state);
+
+ /*
+ * skip the TASK_KILLABLE tasks -- these can be killed
+ * skip the TASK_IDLE tasks -- those are genuinely idle
+ * skip the TASK_FROZEN task -- it reasonably stops scheduling by freezer
+ */
+ if (!(state & TASK_UNINTERRUPTIBLE) ||
+ (state & (TASK_WAKEKILL | TASK_NOLOAD | TASK_FROZEN)))
+ return false;
+
+ /*
+ * When a freshly created task is scheduled once, changes its state to
+ * TASK_UNINTERRUPTIBLE without having ever been switched out once, it
+ * musn't be checked.
+ */
+ if (unlikely(!switch_count))
+ return false;
+
+ if (switch_count != t->last_switch_count) {
+ t->last_switch_count = switch_count;
+ t->last_switch_time = jiffies;
+ return false;
+ }
+ if (time_is_after_jiffies(t->last_switch_time + timeout * HZ))
+ return false;
+
+ return true;
+}
#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
-static void debug_show_blocker(struct task_struct *task)
+static void debug_show_blocker(struct task_struct *task, unsigned long timeout)
{
struct task_struct *g, *t;
unsigned long owner, blocker, blocker_type;
@@ -174,41 +206,21 @@ static void debug_show_blocker(struct task_struct *task)
t->pid, rwsem_blocked_by);
break;
}
- sched_show_task(t);
+ /* Avoid duplicated task dump, skip if the task is also hung. */
+ if (!task_is_hung(t, timeout))
+ sched_show_task(t);
return;
}
}
#else
-static inline void debug_show_blocker(struct task_struct *task)
+static inline void debug_show_blocker(struct task_struct *task, unsigned long timeout)
{
}
#endif
static void check_hung_task(struct task_struct *t, unsigned long timeout)
{
- unsigned long switch_count = t->nvcsw + t->nivcsw;
-
- /*
- * Ensure the task is not frozen.
- * Also, skip vfork and any other user process that freezer should skip.
- */
- if (unlikely(READ_ONCE(t->__state) & TASK_FROZEN))
- return;
-
- /*
- * When a freshly created task is scheduled once, changes its state to
- * TASK_UNINTERRUPTIBLE without having ever been switched out once, it
- * musn't be checked.
- */
- if (unlikely(!switch_count))
- return;
-
- if (switch_count != t->last_switch_count) {
- t->last_switch_count = switch_count;
- t->last_switch_time = jiffies;
- return;
- }
- if (time_is_after_jiffies(t->last_switch_time + timeout * HZ))
+ if (!task_is_hung(t, timeout))
return;
/*
@@ -243,7 +255,7 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
pr_err("\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
" disables this message.\n");
sched_show_task(t);
- debug_show_blocker(t);
+ debug_show_blocker(t, timeout);
hung_task_show_lock = true;
if (sysctl_hung_task_all_cpu_backtrace)
@@ -299,7 +311,6 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
hung_task_show_lock = false;
rcu_read_lock();
for_each_process_thread(g, t) {
- unsigned int state;
if (!max_count--)
goto unlock;
@@ -308,15 +319,8 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
goto unlock;
last_break = jiffies;
}
- /*
- * skip the TASK_KILLABLE tasks -- these can be killed
- * skip the TASK_IDLE tasks -- those are genuinely idle
- */
- state = READ_ONCE(t->__state);
- if ((state & TASK_UNINTERRUPTIBLE) &&
- !(state & TASK_WAKEKILL) &&
- !(state & TASK_NOLOAD))
- check_hung_task(t, timeout);
+
+ check_hung_task(t, timeout);
}
unlock:
rcu_read_unlock();
On (25/07/31 07:11), Masami Hiramatsu (Google) wrote: > From: Masami Hiramatsu (Google) <mhiramat@kernel.org> > > Dump the lock blocker task if it is not hung because if the blocker > task is also hung, it should be dumped by the detector. This will > de-duplicate the same stackdumps if the blocker task is also blocked > by another task (and hung). > > Suggested-by: Sergey Senozhatsky <senozhatsky@chromium.org> > Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org> > Acked-by: Lance Yang <lance.yang@linux.dev> Tested-by: Sergey Senozhatsky <senozhatsky@chromium.org> Wrote a simple 3 tasks circular lock test (same that I had in real life). The output looks good: [ 90.985431] [ T140] INFO: task T1:525 blocked for more than 30 seconds. [ 90.990981] [ T140] Not tainted 6.16.0-next-20250730-00003-g5865c79d6085-dirty #261 [ 90.996912] [ T140] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 91.001617] [ T140] task:T1 state:D stack:0 pid:525 tgid:525 ppid:2 task_flags:0x208040 flags:0x00004000 [ 91.007564] [ T140] Call Trace: [ 91.008767] [ T140] <TASK> [ 91.009872] [ T140] __schedule+0x137f/0x20c0 [ 91.011606] [ T140] schedule+0xdc/0x280 [ 91.013115] [ T140] schedule_preempt_disabled+0x10/0x20 [ 91.015200] [ T140] __mutex_lock+0x721/0x1590 [ 91.016817] [ T140] ? __mutex_lock+0x500/0x1590 [ 91.018074] [ T140] mutex_lock+0x81/0x90 [ 91.019169] [ T140] t1+0x62/0x70 [ 91.020061] [ T140] kthread+0x583/0x6e0 [ 91.021140] [ T140] ? drop_caches_sysctl_handler+0x130/0x130 [ 91.022729] [ T140] ? kthread_blkcg+0xa0/0xa0 [ 91.023921] [ T140] ret_from_fork+0xc8/0x160 [ 91.025091] [ T140] ? kthread_blkcg+0xa0/0xa0 [ 91.026309] [ T140] ret_from_fork_asm+0x11/0x20 [ 91.027577] [ T140] </TASK> [ 91.028326] [ T140] INFO: task T1:525 is blocked on a mutex likely owned by task T2:526. [ 91.030404] [ T140] INFO: task T2:526 blocked for more than 30 seconds. [ 91.031978] [ T140] Not tainted 6.16.0-next-20250730-00003-g5865c79d6085-dirty #261 [ 91.034069] [ T140] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 91.036215] [ T140] task:T2 state:D stack:0 pid:526 tgid:526 ppid:2 task_flags:0x208040 flags:0x00004000 [ 91.039233] [ T140] Call Trace: [ 91.040084] [ T140] <TASK> [ 91.040810] [ T140] __schedule+0x137f/0x20c0 [ 91.041954] [ T140] schedule+0xdc/0x280 [ 91.042984] [ T140] schedule_preempt_disabled+0x10/0x20 [ 91.044329] [ T140] __mutex_lock+0x721/0x1590 [ 91.045464] [ T140] ? __mutex_lock+0x500/0x1590 [ 91.046624] [ T140] mutex_lock+0x81/0x90 [ 91.047632] [ T140] t2+0x69/0x70 [ 91.048473] [ T140] kthread+0x583/0x6e0 [ 91.049453] [ T140] ? drop_pagecache_sb+0x200/0x200 [ 91.050686] [ T140] ? kthread_blkcg+0xa0/0xa0 [ 91.051808] [ T140] ret_from_fork+0xc8/0x160 [ 91.052886] [ T140] ? kthread_blkcg+0xa0/0xa0 [ 91.054036] [ T140] ret_from_fork_asm+0x11/0x20 [ 91.055205] [ T140] </TASK> [ 91.056011] [ T140] INFO: task T2:526 is blocked on a mutex likely owned by task T3:527. [ 91.058014] [ T140] INFO: task T3:527 blocked for more than 30 seconds. [ 91.059682] [ T140] Not tainted 6.16.0-next-20250730-00003-g5865c79d6085-dirty #261 [ 91.061708] [ T140] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 91.063825] [ T140] task:T3 state:D stack:0 pid:527 tgid:527 ppid:2 task_flags:0x208040 flags:0x00004000 [ 91.066668] [ T140] Call Trace: [ 91.067462] [ T140] <TASK> [ 91.068121] [ T140] __schedule+0x137f/0x20c0 [ 91.069244] [ T140] schedule+0xdc/0x280 [ 91.070264] [ T140] schedule_preempt_disabled+0x10/0x20 [ 91.071723] [ T140] __mutex_lock+0x721/0x1590 [ 91.072848] [ T140] ? __mutex_lock+0x500/0x1590 [ 91.074032] [ T140] mutex_lock+0x81/0x90 [ 91.075054] [ T140] t3+0x23/0x30 [ 91.075932] [ T140] kthread+0x583/0x6e0 [ 91.076953] [ T140] ? t2+0x70/0x70 [ 91.077865] [ T140] ? kthread_blkcg+0xa0/0xa0 [ 91.078974] [ T140] ret_from_fork+0xc8/0x160 [ 91.080109] [ T140] ? kthread_blkcg+0xa0/0xa0 [ 91.081250] [ T140] ret_from_fork_asm+0x11/0x20 [ 91.082530] [ T140] </TASK> [ 91.083248] [ T140] INFO: task T3:527 is blocked on a mutex likely owned by task T1:525.
On Thu, 31 Jul 2025 11:53:51 +0900 Sergey Senozhatsky <senozhatsky@chromium.org> wrote: > On (25/07/31 07:11), Masami Hiramatsu (Google) wrote: > > From: Masami Hiramatsu (Google) <mhiramat@kernel.org> > > > > Dump the lock blocker task if it is not hung because if the blocker > > task is also hung, it should be dumped by the detector. This will > > de-duplicate the same stackdumps if the blocker task is also blocked > > by another task (and hung). > > > > Suggested-by: Sergey Senozhatsky <senozhatsky@chromium.org> > > Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org> > > Acked-by: Lance Yang <lance.yang@linux.dev> > > Tested-by: Sergey Senozhatsky <senozhatsky@chromium.org> > Thank you for testing! > > Wrote a simple 3 tasks circular lock test (same that I had in real > life). The output looks good: OK, it works nicely. We'll better to have a similar test (but with maximum warning message because it can crash the kernel.) Thank you, > > [ 90.985431] [ T140] INFO: task T1:525 blocked for more than 30 seconds. > [ 90.990981] [ T140] Not tainted 6.16.0-next-20250730-00003-g5865c79d6085-dirty #261 > [ 90.996912] [ T140] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. > [ 91.001617] [ T140] task:T1 state:D stack:0 pid:525 tgid:525 ppid:2 task_flags:0x208040 flags:0x00004000 > [ 91.007564] [ T140] Call Trace: > [ 91.008767] [ T140] <TASK> > [ 91.009872] [ T140] __schedule+0x137f/0x20c0 > [ 91.011606] [ T140] schedule+0xdc/0x280 > [ 91.013115] [ T140] schedule_preempt_disabled+0x10/0x20 > [ 91.015200] [ T140] __mutex_lock+0x721/0x1590 > [ 91.016817] [ T140] ? __mutex_lock+0x500/0x1590 > [ 91.018074] [ T140] mutex_lock+0x81/0x90 > [ 91.019169] [ T140] t1+0x62/0x70 > [ 91.020061] [ T140] kthread+0x583/0x6e0 > [ 91.021140] [ T140] ? drop_caches_sysctl_handler+0x130/0x130 > [ 91.022729] [ T140] ? kthread_blkcg+0xa0/0xa0 > [ 91.023921] [ T140] ret_from_fork+0xc8/0x160 > [ 91.025091] [ T140] ? kthread_blkcg+0xa0/0xa0 > [ 91.026309] [ T140] ret_from_fork_asm+0x11/0x20 > [ 91.027577] [ T140] </TASK> > [ 91.028326] [ T140] INFO: task T1:525 is blocked on a mutex likely owned by task T2:526. > [ 91.030404] [ T140] INFO: task T2:526 blocked for more than 30 seconds. > [ 91.031978] [ T140] Not tainted 6.16.0-next-20250730-00003-g5865c79d6085-dirty #261 > [ 91.034069] [ T140] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. > [ 91.036215] [ T140] task:T2 state:D stack:0 pid:526 tgid:526 ppid:2 task_flags:0x208040 flags:0x00004000 > [ 91.039233] [ T140] Call Trace: > [ 91.040084] [ T140] <TASK> > [ 91.040810] [ T140] __schedule+0x137f/0x20c0 > [ 91.041954] [ T140] schedule+0xdc/0x280 > [ 91.042984] [ T140] schedule_preempt_disabled+0x10/0x20 > [ 91.044329] [ T140] __mutex_lock+0x721/0x1590 > [ 91.045464] [ T140] ? __mutex_lock+0x500/0x1590 > [ 91.046624] [ T140] mutex_lock+0x81/0x90 > [ 91.047632] [ T140] t2+0x69/0x70 > [ 91.048473] [ T140] kthread+0x583/0x6e0 > [ 91.049453] [ T140] ? drop_pagecache_sb+0x200/0x200 > [ 91.050686] [ T140] ? kthread_blkcg+0xa0/0xa0 > [ 91.051808] [ T140] ret_from_fork+0xc8/0x160 > [ 91.052886] [ T140] ? kthread_blkcg+0xa0/0xa0 > [ 91.054036] [ T140] ret_from_fork_asm+0x11/0x20 > [ 91.055205] [ T140] </TASK> > [ 91.056011] [ T140] INFO: task T2:526 is blocked on a mutex likely owned by task T3:527. > [ 91.058014] [ T140] INFO: task T3:527 blocked for more than 30 seconds. > [ 91.059682] [ T140] Not tainted 6.16.0-next-20250730-00003-g5865c79d6085-dirty #261 > [ 91.061708] [ T140] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. > [ 91.063825] [ T140] task:T3 state:D stack:0 pid:527 tgid:527 ppid:2 task_flags:0x208040 flags:0x00004000 > [ 91.066668] [ T140] Call Trace: > [ 91.067462] [ T140] <TASK> > [ 91.068121] [ T140] __schedule+0x137f/0x20c0 > [ 91.069244] [ T140] schedule+0xdc/0x280 > [ 91.070264] [ T140] schedule_preempt_disabled+0x10/0x20 > [ 91.071723] [ T140] __mutex_lock+0x721/0x1590 > [ 91.072848] [ T140] ? __mutex_lock+0x500/0x1590 > [ 91.074032] [ T140] mutex_lock+0x81/0x90 > [ 91.075054] [ T140] t3+0x23/0x30 > [ 91.075932] [ T140] kthread+0x583/0x6e0 > [ 91.076953] [ T140] ? t2+0x70/0x70 > [ 91.077865] [ T140] ? kthread_blkcg+0xa0/0xa0 > [ 91.078974] [ T140] ret_from_fork+0xc8/0x160 > [ 91.080109] [ T140] ? kthread_blkcg+0xa0/0xa0 > [ 91.081250] [ T140] ret_from_fork_asm+0x11/0x20 > [ 91.082530] [ T140] </TASK> > [ 91.083248] [ T140] INFO: task T3:527 is blocked on a mutex likely owned by task T1:525. -- Masami Hiramatsu (Google) <mhiramat@kernel.org>
On 2025/7/31 10:53, Sergey Senozhatsky wrote: > On (25/07/31 07:11), Masami Hiramatsu (Google) wrote: >> From: Masami Hiramatsu (Google) <mhiramat@kernel.org> >> >> Dump the lock blocker task if it is not hung because if the blocker >> task is also hung, it should be dumped by the detector. This will >> de-duplicate the same stackdumps if the blocker task is also blocked >> by another task (and hung). >> >> Suggested-by: Sergey Senozhatsky <senozhatsky@chromium.org> >> Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org> >> Acked-by: Lance Yang <lance.yang@linux.dev> > > Tested-by: Sergey Senozhatsky <senozhatsky@chromium.org> > > > Wrote a simple 3 tasks circular lock test (same that I had in real > life). The output looks good: Nice work! The log is now much clearer ;) > > [ 90.985431] [ T140] INFO: task T1:525 blocked for more than 30 seconds. > [ 90.990981] [ T140] Not tainted 6.16.0-next-20250730-00003-g5865c79d6085-dirty #261 > [ 90.996912] [ T140] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. > [ 91.001617] [ T140] task:T1 state:D stack:0 pid:525 tgid:525 ppid:2 task_flags:0x208040 flags:0x00004000 > [ 91.007564] [ T140] Call Trace: > [ 91.008767] [ T140] <TASK> > [ 91.009872] [ T140] __schedule+0x137f/0x20c0 > [ 91.011606] [ T140] schedule+0xdc/0x280 > [ 91.013115] [ T140] schedule_preempt_disabled+0x10/0x20 > [ 91.015200] [ T140] __mutex_lock+0x721/0x1590 > [ 91.016817] [ T140] ? __mutex_lock+0x500/0x1590 > [ 91.018074] [ T140] mutex_lock+0x81/0x90 > [ 91.019169] [ T140] t1+0x62/0x70 > [ 91.020061] [ T140] kthread+0x583/0x6e0 > [ 91.021140] [ T140] ? drop_caches_sysctl_handler+0x130/0x130 > [ 91.022729] [ T140] ? kthread_blkcg+0xa0/0xa0 > [ 91.023921] [ T140] ret_from_fork+0xc8/0x160 > [ 91.025091] [ T140] ? kthread_blkcg+0xa0/0xa0 > [ 91.026309] [ T140] ret_from_fork_asm+0x11/0x20 > [ 91.027577] [ T140] </TASK> > [ 91.028326] [ T140] INFO: task T1:525 is blocked on a mutex likely owned by task T2:526. > [ 91.030404] [ T140] INFO: task T2:526 blocked for more than 30 seconds. > [ 91.031978] [ T140] Not tainted 6.16.0-next-20250730-00003-g5865c79d6085-dirty #261 > [ 91.034069] [ T140] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. > [ 91.036215] [ T140] task:T2 state:D stack:0 pid:526 tgid:526 ppid:2 task_flags:0x208040 flags:0x00004000 > [ 91.039233] [ T140] Call Trace: > [ 91.040084] [ T140] <TASK> > [ 91.040810] [ T140] __schedule+0x137f/0x20c0 > [ 91.041954] [ T140] schedule+0xdc/0x280 > [ 91.042984] [ T140] schedule_preempt_disabled+0x10/0x20 > [ 91.044329] [ T140] __mutex_lock+0x721/0x1590 > [ 91.045464] [ T140] ? __mutex_lock+0x500/0x1590 > [ 91.046624] [ T140] mutex_lock+0x81/0x90 > [ 91.047632] [ T140] t2+0x69/0x70 > [ 91.048473] [ T140] kthread+0x583/0x6e0 > [ 91.049453] [ T140] ? drop_pagecache_sb+0x200/0x200 > [ 91.050686] [ T140] ? kthread_blkcg+0xa0/0xa0 > [ 91.051808] [ T140] ret_from_fork+0xc8/0x160 > [ 91.052886] [ T140] ? kthread_blkcg+0xa0/0xa0 > [ 91.054036] [ T140] ret_from_fork_asm+0x11/0x20 > [ 91.055205] [ T140] </TASK> > [ 91.056011] [ T140] INFO: task T2:526 is blocked on a mutex likely owned by task T3:527. > [ 91.058014] [ T140] INFO: task T3:527 blocked for more than 30 seconds. > [ 91.059682] [ T140] Not tainted 6.16.0-next-20250730-00003-g5865c79d6085-dirty #261 > [ 91.061708] [ T140] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. > [ 91.063825] [ T140] task:T3 state:D stack:0 pid:527 tgid:527 ppid:2 task_flags:0x208040 flags:0x00004000 > [ 91.066668] [ T140] Call Trace: > [ 91.067462] [ T140] <TASK> > [ 91.068121] [ T140] __schedule+0x137f/0x20c0 > [ 91.069244] [ T140] schedule+0xdc/0x280 > [ 91.070264] [ T140] schedule_preempt_disabled+0x10/0x20 > [ 91.071723] [ T140] __mutex_lock+0x721/0x1590 > [ 91.072848] [ T140] ? __mutex_lock+0x500/0x1590 > [ 91.074032] [ T140] mutex_lock+0x81/0x90 > [ 91.075054] [ T140] t3+0x23/0x30 > [ 91.075932] [ T140] kthread+0x583/0x6e0 > [ 91.076953] [ T140] ? t2+0x70/0x70 > [ 91.077865] [ T140] ? kthread_blkcg+0xa0/0xa0 > [ 91.078974] [ T140] ret_from_fork+0xc8/0x160 > [ 91.080109] [ T140] ? kthread_blkcg+0xa0/0xa0 > [ 91.081250] [ T140] ret_from_fork_asm+0x11/0x20 > [ 91.082530] [ T140] </TASK> > [ 91.083248] [ T140] INFO: task T3:527 is blocked on a mutex likely owned by task T1:525.
On 2025/7/31 11:15, Lance Yang wrote: > > > On 2025/7/31 10:53, Sergey Senozhatsky wrote: >> On (25/07/31 07:11), Masami Hiramatsu (Google) wrote: >>> From: Masami Hiramatsu (Google) <mhiramat@kernel.org> >>> >>> Dump the lock blocker task if it is not hung because if the blocker >>> task is also hung, it should be dumped by the detector. This will >>> de-duplicate the same stackdumps if the blocker task is also blocked >>> by another task (and hung). >>> >>> Suggested-by: Sergey Senozhatsky <senozhatsky@chromium.org> >>> Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org> >>> Acked-by: Lance Yang <lance.yang@linux.dev> >> >> Tested-by: Sergey Senozhatsky <senozhatsky@chromium.org> >> >> >> Wrote a simple 3 tasks circular lock test (same that I had in real >> life). The output looks good: > > Nice work! The log is now much clearer ;) Forgot to add: This circular lock test is great. It would be very valuable to have it in samples/hung_task ;) Would you be willing to submit it as a patch? Thanks, Lance > >> >> [ 90.985431] [ T140] INFO: task T1:525 blocked for more than 30 >> seconds. >> [ 90.990981] [ T140] Not tainted 6.16.0- >> next-20250730-00003-g5865c79d6085-dirty #261 >> [ 90.996912] [ T140] "echo 0 > /proc/sys/kernel/ >> hung_task_timeout_secs" disables this message. >> [ 91.001617] [ T140] task:T1 state:D stack:0 >> pid:525 tgid:525 ppid:2 task_flags:0x208040 flags:0x00004000 >> [ 91.007564] [ T140] Call Trace: >> [ 91.008767] [ T140] <TASK> >> [ 91.009872] [ T140] __schedule+0x137f/0x20c0 >> [ 91.011606] [ T140] schedule+0xdc/0x280 >> [ 91.013115] [ T140] schedule_preempt_disabled+0x10/0x20 >> [ 91.015200] [ T140] __mutex_lock+0x721/0x1590 >> [ 91.016817] [ T140] ? __mutex_lock+0x500/0x1590 >> [ 91.018074] [ T140] mutex_lock+0x81/0x90 >> [ 91.019169] [ T140] t1+0x62/0x70 >> [ 91.020061] [ T140] kthread+0x583/0x6e0 >> [ 91.021140] [ T140] ? drop_caches_sysctl_handler+0x130/0x130 >> [ 91.022729] [ T140] ? kthread_blkcg+0xa0/0xa0 >> [ 91.023921] [ T140] ret_from_fork+0xc8/0x160 >> [ 91.025091] [ T140] ? kthread_blkcg+0xa0/0xa0 >> [ 91.026309] [ T140] ret_from_fork_asm+0x11/0x20 >> [ 91.027577] [ T140] </TASK> >> [ 91.028326] [ T140] INFO: task T1:525 is blocked on a mutex >> likely owned by task T2:526. >> [ 91.030404] [ T140] INFO: task T2:526 blocked for more than 30 >> seconds. >> [ 91.031978] [ T140] Not tainted 6.16.0- >> next-20250730-00003-g5865c79d6085-dirty #261 >> [ 91.034069] [ T140] "echo 0 > /proc/sys/kernel/ >> hung_task_timeout_secs" disables this message. >> [ 91.036215] [ T140] task:T2 state:D stack:0 >> pid:526 tgid:526 ppid:2 task_flags:0x208040 flags:0x00004000 >> [ 91.039233] [ T140] Call Trace: >> [ 91.040084] [ T140] <TASK> >> [ 91.040810] [ T140] __schedule+0x137f/0x20c0 >> [ 91.041954] [ T140] schedule+0xdc/0x280 >> [ 91.042984] [ T140] schedule_preempt_disabled+0x10/0x20 >> [ 91.044329] [ T140] __mutex_lock+0x721/0x1590 >> [ 91.045464] [ T140] ? __mutex_lock+0x500/0x1590 >> [ 91.046624] [ T140] mutex_lock+0x81/0x90 >> [ 91.047632] [ T140] t2+0x69/0x70 >> [ 91.048473] [ T140] kthread+0x583/0x6e0 >> [ 91.049453] [ T140] ? drop_pagecache_sb+0x200/0x200 >> [ 91.050686] [ T140] ? kthread_blkcg+0xa0/0xa0 >> [ 91.051808] [ T140] ret_from_fork+0xc8/0x160 >> [ 91.052886] [ T140] ? kthread_blkcg+0xa0/0xa0 >> [ 91.054036] [ T140] ret_from_fork_asm+0x11/0x20 >> [ 91.055205] [ T140] </TASK> >> [ 91.056011] [ T140] INFO: task T2:526 is blocked on a mutex >> likely owned by task T3:527. >> [ 91.058014] [ T140] INFO: task T3:527 blocked for more than 30 >> seconds. >> [ 91.059682] [ T140] Not tainted 6.16.0- >> next-20250730-00003-g5865c79d6085-dirty #261 >> [ 91.061708] [ T140] "echo 0 > /proc/sys/kernel/ >> hung_task_timeout_secs" disables this message. >> [ 91.063825] [ T140] task:T3 state:D stack:0 >> pid:527 tgid:527 ppid:2 task_flags:0x208040 flags:0x00004000 >> [ 91.066668] [ T140] Call Trace: >> [ 91.067462] [ T140] <TASK> >> [ 91.068121] [ T140] __schedule+0x137f/0x20c0 >> [ 91.069244] [ T140] schedule+0xdc/0x280 >> [ 91.070264] [ T140] schedule_preempt_disabled+0x10/0x20 >> [ 91.071723] [ T140] __mutex_lock+0x721/0x1590 >> [ 91.072848] [ T140] ? __mutex_lock+0x500/0x1590 >> [ 91.074032] [ T140] mutex_lock+0x81/0x90 >> [ 91.075054] [ T140] t3+0x23/0x30 >> [ 91.075932] [ T140] kthread+0x583/0x6e0 >> [ 91.076953] [ T140] ? t2+0x70/0x70 >> [ 91.077865] [ T140] ? kthread_blkcg+0xa0/0xa0 >> [ 91.078974] [ T140] ret_from_fork+0xc8/0x160 >> [ 91.080109] [ T140] ? kthread_blkcg+0xa0/0xa0 >> [ 91.081250] [ T140] ret_from_fork_asm+0x11/0x20 >> [ 91.082530] [ T140] </TASK> >> [ 91.083248] [ T140] INFO: task T3:527 is blocked on a mutex >> likely owned by task T1:525. >
On (25/07/31 11:25), Lance Yang wrote: > > > Wrote a simple 3 tasks circular lock test (same that I had in real > > > life). The output looks good: > > > > Nice work! The log is now much clearer ;) > > Forgot to add: > > This circular lock test is great. It would be very valuable to have > it in samples/hung_task ;) > > Would you be willing to submit it as a patch? Well, that particular test is a deadlock, not sure how useful a test that deadlocks is going to be. We probably need some alternative if we want to test backtrace de-dup.
On 2025/7/31 11:37, Sergey Senozhatsky wrote: > On (25/07/31 11:25), Lance Yang wrote: >>>> Wrote a simple 3 tasks circular lock test (same that I had in real >>>> life). The output looks good: >>> >>> Nice work! The log is now much clearer ;) >> >> Forgot to add: >> >> This circular lock test is great. It would be very valuable to have >> it in samples/hung_task ;) >> >> Would you be willing to submit it as a patch? > > Well, that particular test is a deadlock, not sure how useful a test > that deadlocks is going to be. We probably need some alternative if > we want to test backtrace de-dup. Yep, you're right. A permanent deadlock is a no-go for a reusable test. Maybe we need a test like a blocker chain (T1->T2->T3), where T3 holds the lock and just sleeps for a while. Once it wakes up and releases the lock, everything can finish cleanly. Also, I might try to cook up a sample for that, time permitting ;)
© 2016 - 2025 Red Hat, Inc.