.../admin-guide/kernel-parameters.txt | 5 ++ kernel/irq/handle.c | 48 ++++++++++++++++++- 2 files changed, 52 insertions(+), 1 deletion(-)
This patch adds a mechanism to detect and warn about long-running IRQ
handlers exceeding a user-defined duration threshold in microseconds.
The feature is enabled via the kernel boot parameter:
"irqhandler.duration_warn_us=<threshold_in_us>"
For example, passing irqhandler.duration_warn_us=1000 will warn if an
IRQ handler takes more than 1000 microseconds.
Implementation uses local_clock() to measure the execution duration of
IRQ handlers. When the threshold is exceeded, a ratelimited warning is
printed:
"[CPU14] long duration on IRQ[159:bad_irq_handler [long_irq]], took: 1330 us"
Signed-off-by: Wladislav Wiebe <wladislav.wiebe@nokia.com>
---
V1 -> V2: refactor to use local_clock() instead of jiffies and replace
Kconfig knobs by a new command-line parameter.
V1 link: https://lore.kernel.org/lkml/20250630124721.18232-1-wladislav.wiebe@nokia.com/
---
.../admin-guide/kernel-parameters.txt | 5 ++
kernel/irq/handle.c | 48 ++++++++++++++++++-
2 files changed, 52 insertions(+), 1 deletion(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index f1f2c0874da9..fa89f21ea1e6 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2543,6 +2543,11 @@
for it. Intended to get systems with badly broken
firmware running.
+ irqhandler.duration_warn_us= [KNL,EARLY]
+ Warn if an IRQ handler exceeds the specified duration
+ threshold in microseconds. Useful for identifying
+ long-running IRQs in the system.
+
irqpoll [HW]
When an interrupt is not handled search all handlers
for it. Also check all handlers each timer
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 9489f93b3db3..eab8fdfab8d8 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -136,6 +136,44 @@ void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action)
wake_up_process(action->thread);
}
+static DEFINE_STATIC_KEY_FALSE(irqhandler_duration_check_enabled);
+static u64 irqhandler_duration_threshold_us __ro_after_init;
+
+static int __init irqhandler_duration_check_setup(char *arg)
+{
+ unsigned long val;
+ int ret;
+
+ if (!arg)
+ return 0;
+
+ ret = kstrtoul(arg, 0, &val);
+ if (ret)
+ return ret;
+
+ if (val > 0) {
+ irqhandler_duration_threshold_us = val;
+ static_branch_enable(&irqhandler_duration_check_enabled);
+ } else {
+ pr_err("Invalid irqhandler.duration_warn_us setting (%lu)\n", val);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+early_param("irqhandler.duration_warn_us", irqhandler_duration_check_setup);
+
+static inline void irqhandler_duration_check(u64 ts_start, unsigned int irq,
+ struct irqaction *action)
+{
+ u64 delta_us = (local_clock() - ts_start) >> 10;
+
+ if (unlikely(delta_us > irqhandler_duration_threshold_us)) {
+ pr_warn_ratelimited("[CPU%d] long duration on IRQ[%u:%ps], took: %llu us\n",
+ smp_processor_id(), irq, action->handler, delta_us);
+ }
+}
+
irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc)
{
irqreturn_t retval = IRQ_NONE;
@@ -146,6 +184,7 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc)
for_each_action_of_desc(desc, action) {
irqreturn_t res;
+ u64 ts_start;
/*
* If this IRQ would be threaded under force_irqthreads, mark it so.
@@ -155,7 +194,14 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc)
lockdep_hardirq_threaded();
trace_irq_handler_entry(irq, action);
- res = action->handler(irq, action->dev_id);
+
+ if (static_branch_unlikely(&irqhandler_duration_check_enabled)) {
+ ts_start = local_clock();
+ res = action->handler(irq, action->dev_id);
+ irqhandler_duration_check(ts_start, irq, action);
+ } else
+ res = action->handler(irq, action->dev_id);
+
trace_irq_handler_exit(irq, action, res);
if (WARN_ONCE(!irqs_disabled(),"irq %u handler %pS enabled interrupts\n",
--
2.39.3.dirty
On 14. 07. 25, 10:41, Wladislav Wiebe wrote: > This patch adds a mechanism to detect and warn about long-running IRQ > handlers exceeding a user-defined duration threshold in microseconds. > > The feature is enabled via the kernel boot parameter: > "irqhandler.duration_warn_us=<threshold_in_us>" > > For example, passing irqhandler.duration_warn_us=1000 will warn if an > IRQ handler takes more than 1000 microseconds. > > Implementation uses local_clock() to measure the execution duration of > IRQ handlers. When the threshold is exceeded, a ratelimited warning is > printed: > > "[CPU14] long duration on IRQ[159:bad_irq_handler [long_irq]], took: 1330 us" > > Signed-off-by: Wladislav Wiebe <wladislav.wiebe@nokia.com> > --- > V1 -> V2: refactor to use local_clock() instead of jiffies and replace > Kconfig knobs by a new command-line parameter. > V1 link: https://lore.kernel.org/lkml/20250630124721.18232-1-wladislav.wiebe@nokia.com/ > --- > .../admin-guide/kernel-parameters.txt | 5 ++ > kernel/irq/handle.c | 48 ++++++++++++++++++- > 2 files changed, 52 insertions(+), 1 deletion(-) > > diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt > index f1f2c0874da9..fa89f21ea1e6 100644 > --- a/Documentation/admin-guide/kernel-parameters.txt > +++ b/Documentation/admin-guide/kernel-parameters.txt > @@ -2543,6 +2543,11 @@ > for it. Intended to get systems with badly broken > firmware running. > > + irqhandler.duration_warn_us= [KNL,EARLY] > + Warn if an IRQ handler exceeds the specified duration > + threshold in microseconds. Useful for identifying > + long-running IRQs in the system. > + > irqpoll [HW] > When an interrupt is not handled search all handlers > for it. Also check all handlers each timer > diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c > index 9489f93b3db3..eab8fdfab8d8 100644 > --- a/kernel/irq/handle.c > +++ b/kernel/irq/handle.c > @@ -136,6 +136,44 @@ void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action) > wake_up_process(action->thread); > } > > +static DEFINE_STATIC_KEY_FALSE(irqhandler_duration_check_enabled); > +static u64 irqhandler_duration_threshold_us __ro_after_init; > + > +static int __init irqhandler_duration_check_setup(char *arg) > +{ > + unsigned long val; > + int ret; > + > + if (!arg) > + return 0; > + > + ret = kstrtoul(arg, 0, &val); > + if (ret) > + return ret; > + > + if (val > 0) { > + irqhandler_duration_threshold_us = val; > + static_branch_enable(&irqhandler_duration_check_enabled); > + } else { > + pr_err("Invalid irqhandler.duration_warn_us setting (%lu)\n", val); > + return -EINVAL; Perhaps invert the condition and drop the "else {}"? > + } > + > + return 0; > +} > +early_param("irqhandler.duration_warn_us", irqhandler_duration_check_setup); > + > +static inline void irqhandler_duration_check(u64 ts_start, unsigned int irq, > + struct irqaction *action) Can be const. > +{ > + u64 delta_us = (local_clock() - ts_start) >> 10; > + > + if (unlikely(delta_us > irqhandler_duration_threshold_us)) { > + pr_warn_ratelimited("[CPU%d] long duration on IRQ[%u:%ps], took: %llu us\n", s/%d/%u/. Do you mean "of IRQ[...]"? > + smp_processor_id(), irq, action->handler, delta_us); > + } > +} > + > irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc) > { > irqreturn_t retval = IRQ_NONE; thanks, -- js suse labs
On 22/07/2025 10:21, Jiri Slaby wrote: > > On 14. 07. 25, 10:41, Wladislav Wiebe wrote: >> This patch adds a mechanism to detect and warn about long-running IRQ >> handlers exceeding a user-defined duration threshold in microseconds. >> >> The feature is enabled via the kernel boot parameter: >> "irqhandler.duration_warn_us=<threshold_in_us>" >> >> For example, passing irqhandler.duration_warn_us=1000 will warn if an >> IRQ handler takes more than 1000 microseconds. >> >> Implementation uses local_clock() to measure the execution duration of >> IRQ handlers. When the threshold is exceeded, a ratelimited warning is >> printed: >> >> "[CPU14] long duration on IRQ[159:bad_irq_handler [long_irq]], took: 1330 us" >> >> Signed-off-by: Wladislav Wiebe <wladislav.wiebe@nokia.com> >> --- >> V1 -> V2: refactor to use local_clock() instead of jiffies and replace >> Kconfig knobs by a new command-line parameter. >> V1 link: https://lore.kernel.org/lkml/20250630124721.18232-1-wladislav.wiebe@nokia.com/ >> --- >> .../admin-guide/kernel-parameters.txt | 5 ++ >> kernel/irq/handle.c | 48 ++++++++++++++++++- >> 2 files changed, 52 insertions(+), 1 deletion(-) >> >> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt >> index f1f2c0874da9..fa89f21ea1e6 100644 >> --- a/Documentation/admin-guide/kernel-parameters.txt >> +++ b/Documentation/admin-guide/kernel-parameters.txt >> @@ -2543,6 +2543,11 @@ >> for it. Intended to get systems with badly broken >> firmware running. >> >> + irqhandler.duration_warn_us= [KNL,EARLY] >> + Warn if an IRQ handler exceeds the specified duration >> + threshold in microseconds. Useful for identifying >> + long-running IRQs in the system. >> + >> irqpoll [HW] >> When an interrupt is not handled search all handlers >> for it. Also check all handlers each timer >> diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c >> index 9489f93b3db3..eab8fdfab8d8 100644 >> --- a/kernel/irq/handle.c >> +++ b/kernel/irq/handle.c >> @@ -136,6 +136,44 @@ void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action) >> wake_up_process(action->thread); >> } >> >> +static DEFINE_STATIC_KEY_FALSE(irqhandler_duration_check_enabled); >> +static u64 irqhandler_duration_threshold_us __ro_after_init; >> + >> +static int __init irqhandler_duration_check_setup(char *arg) >> +{ >> + unsigned long val; >> + int ret; >> + >> + if (!arg) >> + return 0; >> + >> + ret = kstrtoul(arg, 0, &val); >> + if (ret) >> + return ret; >> + >> + if (val > 0) { >> + irqhandler_duration_threshold_us = val; >> + static_branch_enable(&irqhandler_duration_check_enabled); >> + } else { >> + pr_err("Invalid irqhandler.duration_warn_us setting (%lu)\n", val); >> + return -EINVAL; > > Perhaps invert the condition and drop the "else {}"? > >> + } >> + >> + return 0; >> +} >> +early_param("irqhandler.duration_warn_us", irqhandler_duration_check_setup); >> + >> +static inline void irqhandler_duration_check(u64 ts_start, unsigned int irq, >> + struct irqaction *action) > > Can be const. > >> +{ >> + u64 delta_us = (local_clock() - ts_start) >> 10; >> + >> + if (unlikely(delta_us > irqhandler_duration_threshold_us)) { >> + pr_warn_ratelimited("[CPU%d] long duration on IRQ[%u:%ps], took: %llu us\n", > > s/%d/%u/. > Do you mean "of IRQ[...]"? > >> + smp_processor_id(), irq, action->handler, delta_us); >> + } >> +} >> + >> irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc) >> { >> irqreturn_t retval = IRQ_NONE; Thanks for the comments, I've addressed them in v3: https://lore.kernel.org/lkml/20250723182836.1177-1-wladislav.wiebe@nokia.com/ - W.W.
On Mon, Jul 14 2025 at 10:41, Wladislav Wiebe wrote: > This patch adds a mechanism to detect and warn about long-running IRQ # git grep 'This patch' Documentation/process/ Also please read: https://www.kernel.org/doc/html/latest/process/maintainer-tip.html#changelog > +static int __init irqhandler_duration_check_setup(char *arg) > +{ > + unsigned long val; > + int ret; > + > + if (!arg) > + return 0; > + > + ret = kstrtoul(arg, 0, &val); > + if (ret) > + return ret; > + > + if (val > 0) { > + irqhandler_duration_threshold_us = val; > + static_branch_enable(&irqhandler_duration_check_enabled); > + } else { > + pr_err("Invalid irqhandler.duration_warn_us setting (%lu)\n", val); > + return -EINVAL; > + } > + > + return 0; > +} > +early_param("irqhandler.duration_warn_us", irqhandler_duration_check_setup); Why early_param? Nothing cares about this during early boot. > +static inline void irqhandler_duration_check(u64 ts_start, unsigned int irq, > + struct irqaction *action) > +{ > + u64 delta_us = (local_clock() - ts_start) >> 10; Lacks a comment that this is an intentional approximation. > + if (unlikely(delta_us > irqhandler_duration_threshold_us)) { > + pr_warn_ratelimited("[CPU%d] long duration on IRQ[%u:%ps], took: %llu us\n", > + smp_processor_id(), irq, action->handler, delta_us); Please align the arguments in the second line properly. https://www.kernel.org/doc/html/latest/process/maintainer-tip.html#line-breaks > + } > +} > + > irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc) > { > irqreturn_t retval = IRQ_NONE; > @@ -146,6 +184,7 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc) > > for_each_action_of_desc(desc, action) { > irqreturn_t res; > + u64 ts_start; This wants to be in the if() branch where it is actually used. > /* > * If this IRQ would be threaded under force_irqthreads, mark it so. > @@ -155,7 +194,14 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc) > lockdep_hardirq_threaded(); > > trace_irq_handler_entry(irq, action); > - res = action->handler(irq, action->dev_id); > + > + if (static_branch_unlikely(&irqhandler_duration_check_enabled)) { > + ts_start = local_clock(); > + res = action->handler(irq, action->dev_id); > + irqhandler_duration_check(ts_start, irq, action); > + } else > + res = action->handler(irq, action->dev_id); > + Even if not required by C, the else clause wants brackets too for symmetry. if (foo) bar(); else baz(); parses perfectly fine. if (foo) { do_stuff(); bar(); } else baz(); is asymmetrical and disturbs the reading flow, which is pattern based. The extra brackets just make it easier to parse: if (foo) { do_stuff(); bar(); } else { baz(); } See? Thanks, tglx
On 18/07/2025 22:53, Thomas Gleixner wrote: > On Mon, Jul 14 2025 at 10:41, Wladislav Wiebe wrote: >> This patch adds a mechanism to detect and warn about long-running IRQ > # git grep 'This patch' Documentation/process/ > > Also please read: > > https://www.kernel.org/doc/html/latest/process/maintainer-tip.html#changelog > >> +static int __init irqhandler_duration_check_setup(char *arg) >> +{ >> + unsigned long val; >> + int ret; >> + >> + if (!arg) >> + return 0; >> + >> + ret = kstrtoul(arg, 0, &val); >> + if (ret) >> + return ret; >> + >> + if (val > 0) { >> + irqhandler_duration_threshold_us = val; >> + static_branch_enable(&irqhandler_duration_check_enabled); >> + } else { >> + pr_err("Invalid irqhandler.duration_warn_us setting (%lu)\n", val); >> + return -EINVAL; >> + } >> + >> + return 0; >> +} >> +early_param("irqhandler.duration_warn_us", irqhandler_duration_check_setup); > Why early_param? Nothing cares about this during early boot. > >> +static inline void irqhandler_duration_check(u64 ts_start, unsigned int irq, >> + struct irqaction *action) >> +{ >> + u64 delta_us = (local_clock() - ts_start) >> 10; > Lacks a comment that this is an intentional approximation. > >> + if (unlikely(delta_us > irqhandler_duration_threshold_us)) { >> + pr_warn_ratelimited("[CPU%d] long duration on IRQ[%u:%ps], took: %llu us\n", >> + smp_processor_id(), irq, action->handler, delta_us); > Please align the arguments in the second line properly. > > https://www.kernel.org/doc/html/latest/process/maintainer-tip.html#line-breaks > >> + } >> +} >> + >> irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc) >> { >> irqreturn_t retval = IRQ_NONE; >> @@ -146,6 +184,7 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc) >> >> for_each_action_of_desc(desc, action) { >> irqreturn_t res; >> + u64 ts_start; > This wants to be in the if() branch where it is actually used. > >> /* >> * If this IRQ would be threaded under force_irqthreads, mark it so. >> @@ -155,7 +194,14 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc) >> lockdep_hardirq_threaded(); >> >> trace_irq_handler_entry(irq, action); >> - res = action->handler(irq, action->dev_id); >> + >> + if (static_branch_unlikely(&irqhandler_duration_check_enabled)) { >> + ts_start = local_clock(); >> + res = action->handler(irq, action->dev_id); >> + irqhandler_duration_check(ts_start, irq, action); >> + } else >> + res = action->handler(irq, action->dev_id); >> + > Even if not required by C, the else clause wants brackets too for > symmetry. > > if (foo) > bar(); > else > baz(); > > parses perfectly fine. > > if (foo) { > do_stuff(); > bar(); > } else > baz(); > > is asymmetrical and disturbs the reading flow, which is pattern > based. The extra brackets just make it easier to parse: > > if (foo) { > do_stuff(); > bar(); > } else { > baz(); > } > > See? > > Thanks, > > tglx Thanks for further comments, I've addressed them in v3: https://lore.kernel.org/lkml/20250723182836.1177-1-wladislav.wiebe@nokia.com/ - W.W.
© 2016 - 2025 Red Hat, Inc.