kernel/workqueue.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+)
When we want to debug the workqueue stall, we can immediately make
a panic to get the information we want.
In some systems, it may be necessary to quickly reboot the system to
escape from a workqueue lockup situation. In this case, we can control
the number of stall detections to generate panic.
Signed-off-by: Sangmoon Kim <sangmoon.kim@samsung.com>
---
kernel/workqueue.c | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index dfd42c28e404..893310ce23be 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -7406,6 +7406,11 @@ static struct timer_list wq_watchdog_timer;
static unsigned long wq_watchdog_touched = INITIAL_JIFFIES;
static DEFINE_PER_CPU(unsigned long, wq_watchdog_touched_cpu) = INITIAL_JIFFIES;
+static bool wq_panic_on_watchdog;
+module_param_named(panic_on_watchdog, wq_panic_on_watchdog, bool, 0644);
+static unsigned int wq_max_watchdog_to_panic;
+module_param_named(max_watchdog_to_panic, wq_max_watchdog_to_panic, uint, 0644);
+
/*
* Show workers that might prevent the processing of pending work items.
* The only candidates are CPU-bound workers in the running state.
@@ -7457,6 +7462,16 @@ static void show_cpu_pools_hogs(void)
rcu_read_unlock();
}
+static void panic_on_wq_watchdog(void)
+{
+ static unsigned int wq_busy;
+
+ if (wq_panic_on_watchdog) {
+ wq_busy++;
+ BUG_ON(wq_busy >= wq_max_watchdog_to_panic);
+ }
+}
+
static void wq_watchdog_reset_touched(void)
{
int cpu;
@@ -7529,6 +7544,9 @@ static void wq_watchdog_timer_fn(struct timer_list *unused)
if (cpu_pool_stall)
show_cpu_pools_hogs();
+ if (lockup_detected)
+ panic_on_wq_watchdog();
+
wq_watchdog_reset_touched();
mod_timer(&wq_watchdog_timer, jiffies + thresh);
}
--
2.34.1
Hello, On Tue, Jul 30, 2024 at 05:04:28PM +0900, Sangmoon Kim wrote: > +static bool wq_panic_on_watchdog; > +module_param_named(panic_on_watchdog, wq_panic_on_watchdog, bool, 0644); > +static unsigned int wq_max_watchdog_to_panic; > +module_param_named(max_watchdog_to_panic, wq_max_watchdog_to_panic, uint, 0644); Can you combine the two into a single parameter? Maybe name it wq_panic_on_stall? 0 disables. >0 indicates the number of times before triggering panic, maybe? Thanks. -- tejun
> -----Original Message----- > From: Tejun Heo <htejun@gmail.com> On Behalf Of Tejun Heo > Sent: Wednesday, July 31, 2024 6:44 AM > > Hello, > > On Tue, Jul 30, 2024 at 05:04:28PM +0900, Sangmoon Kim wrote: > > +static bool wq_panic_on_watchdog; > > +module_param_named(panic_on_watchdog, wq_panic_on_watchdog, bool, 0644); > > +static unsigned int wq_max_watchdog_to_panic; > > +module_param_named(max_watchdog_to_panic, wq_max_watchdog_to_panic, uint, 0644); > > Can you combine the two into a single parameter? Maybe name it > wq_panic_on_stall? 0 disables. >0 indicates the number of times before > triggering panic, maybe? > > Thanks. > > -- > tejun Okay. Let me do that. Thanks for the review. Sangmoon
© 2016 - 2025 Red Hat, Inc.