kernel/trace/rv/Kconfig | 8 +++ kernel/trace/rv/Makefile | 1 + kernel/trace/rv/reactor_signal.c | 114 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 123 insertions(+)
Reactions of the existing reactors are not easily detectable from programs
and also not easily attributable to the triggering task.
This makes it hard to test the monitors themselves programmatically.
The signal reactors allows applications to validate the correct operations
of monitors either by installing custom signal handlers or by forking a
child and waiting for the expected exit code.
For now only SIGBUS is supported, but additional signals can be added.
As the reactors are called from tracepoints they need to be able to run in
any context. To avoid taking any of the looks used during signal delivery
from an invalid context, schedule it through task_work. The delivery will
be delayed, for example when then sleep monitor detects an invalid sleep.
Signed-off-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
---
kernel/trace/rv/Kconfig | 8 +++
kernel/trace/rv/Makefile | 1 +
kernel/trace/rv/reactor_signal.c | 114 +++++++++++++++++++++++++++++++++++++++
3 files changed, 123 insertions(+)
diff --git a/kernel/trace/rv/Kconfig b/kernel/trace/rv/Kconfig
index 5b4be87ba59d3fa5123a64efa746320c9e8b73b1..dc7b546615a67c811ce94c43bb1db2826cd00c77 100644
--- a/kernel/trace/rv/Kconfig
+++ b/kernel/trace/rv/Kconfig
@@ -93,3 +93,11 @@ config RV_REACT_PANIC
help
Enables the panic reactor. The panic reactor emits a printk()
message if an exception is found and panic()s the system.
+
+config RV_REACT_SIGNAL
+ bool "Signal reactor"
+ depends on RV_REACTORS
+ default y
+ help
+ Enables the signal reactor. The signal reactors sends a signal to the
+ task triggering an exception.
diff --git a/kernel/trace/rv/Makefile b/kernel/trace/rv/Makefile
index 42ff5d5aa9dde3ed2964e0b8d4ab7b236f498319..adf56bbd03ffa0d48de1f0d86ca5fcce1628bdba 100644
--- a/kernel/trace/rv/Makefile
+++ b/kernel/trace/rv/Makefile
@@ -23,3 +23,4 @@ obj-$(CONFIG_RV_MON_OPID) += monitors/opid/opid.o
obj-$(CONFIG_RV_REACTORS) += rv_reactors.o
obj-$(CONFIG_RV_REACT_PRINTK) += reactor_printk.o
obj-$(CONFIG_RV_REACT_PANIC) += reactor_panic.o
+obj-$(CONFIG_RV_REACT_SIGNAL) += reactor_signal.o
diff --git a/kernel/trace/rv/reactor_signal.c b/kernel/trace/rv/reactor_signal.c
new file mode 100644
index 0000000000000000000000000000000000000000..e123786d94371a240beb63b2d1b2f3044a466404
--- /dev/null
+++ b/kernel/trace/rv/reactor_signal.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2025 Thomas Weißschuh, Linutronix GmbH
+ *
+ * Signal RV reactor:
+ * Prints the exception msg to the kernel message log and sends a signal to the offending task.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/cpumask.h>
+#include <linux/init.h>
+#include <linux/mempool.h>
+#include <linux/module.h>
+#include <linux/printk.h>
+#include <linux/rv.h>
+#include <linux/sched/signal.h>
+#include <linux/task_work.h>
+
+struct rv_signal_work {
+ struct callback_head twork;
+ int signal;
+ char message[256];
+};
+
+static mempool_t *rv_signal_task_work_pool;
+
+static void rv_signal_force_sig(int signal, const char *message)
+{
+ /* The message already contains a subsystem prefix, so use raw printk() */
+ printk(KERN_WARNING "%s", message);
+ pr_warn("Killing PID %d with signal %d", task_pid_nr(current), signal);
+ force_sig(signal);
+}
+
+static void rv_signal_task_work(struct callback_head *cbh)
+{
+ struct rv_signal_work *work = container_of_const(cbh, struct rv_signal_work, twork);
+
+ rv_signal_force_sig(work->signal, work->message);
+
+ mempool_free(work, rv_signal_task_work_pool);
+}
+
+static void rv_reaction_signal(int signal, const char *fmt, va_list args)
+{
+ struct rv_signal_work *work;
+ char message[256];
+
+ work = mempool_alloc_preallocated(rv_signal_task_work_pool);
+ if (!work) {
+ pr_warn_ratelimited("Unable to signal through task_work, sending directly\n");
+ vsnprintf(message, sizeof(message), fmt, args);
+ rv_signal_force_sig(signal, message);
+ return;
+ }
+
+ init_task_work(&work->twork, rv_signal_task_work);
+ work->signal = signal;
+ vsnprintf(work->message, sizeof(work->message), fmt, args);
+
+ /*
+ * The reactor can be called from any context through tracepoints.
+ * To avoid any locking or other operations not usable from all contexts, use TWA_RESUME.
+ * The signal might be delayed, but that shouldn't be an issue.
+ */
+ task_work_add(current, &work->twork, TWA_RESUME);
+}
+
+__printf(1, 2)
+static void rv_reaction_sigbus(const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ rv_reaction_signal(SIGBUS, fmt, args);
+ va_end(args);
+}
+
+static struct rv_reactor rv_sigbus = {
+ .name = "sigbus",
+ .description = "Kill the current task with SIGBUS",
+ .react = rv_reaction_sigbus,
+};
+
+static int __init register_react_signal(void)
+{
+ int ret;
+
+ rv_signal_task_work_pool = mempool_create_kmalloc_pool(num_possible_cpus(),
+ sizeof(struct rv_signal_work));
+ if (!rv_signal_task_work_pool)
+ return -ENOMEM;
+
+ ret = rv_register_reactor(&rv_sigbus);
+ if (ret) {
+ mempool_destroy(rv_signal_task_work_pool);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void __exit unregister_react_signal(void)
+{
+ rv_unregister_reactor(&rv_sigbus);
+ mempool_destroy(rv_signal_task_work_pool);
+}
+
+module_init(register_react_signal);
+module_exit(unregister_react_signal);
+
+MODULE_AUTHOR("Thomas Weißschuh <thomas.weissschuh@linutronix.de>");
+MODULE_DESCRIPTION("signal rv reactor: send a signal if an exception is found.");
---
base-commit: adbdaac63f5024a9a117ef8ce9732a4272fbc931
change-id: 20250901-rv-reactor-signal-b5568e0722a9
Best regards,
--
Thomas Weißschuh <thomas.weissschuh@linutronix.de>
On Fri, 2025-09-19 at 12:49 +0200, Thomas Weißschuh wrote: > Reactions of the existing reactors are not easily detectable from programs > and also not easily attributable to the triggering task. > This makes it hard to test the monitors themselves programmatically. > > The signal reactors allows applications to validate the correct operations > of monitors either by installing custom signal handlers or by forking a > child and waiting for the expected exit code. Thanks, this looks like a really nice addition! > For now only SIGBUS is supported, but additional signals can be added. Curious choice of a default signal, is this specific to your use-case? We may want to add some kind of reactors options in the future to allow configuring this, but I'd say it isn't needed for now. > As the reactors are called from tracepoints they need to be able to run in > any context. To avoid taking any of the looks used during signal delivery You probably meant "locks" > from an invalid context, schedule it through task_work. The delivery will > be delayed, for example when then sleep monitor detects an invalid sleep. > > Signed-off-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de> > --- > kernel/trace/rv/Kconfig | 8 +++ > kernel/trace/rv/Makefile | 1 + > kernel/trace/rv/reactor_signal.c | 114 > +++++++++++++++++++++++++++++++++++++++ > 3 files changed, 123 insertions(+) > > diff --git a/kernel/trace/rv/Kconfig b/kernel/trace/rv/Kconfig > index > 5b4be87ba59d3fa5123a64efa746320c9e8b73b1..dc7b546615a67c811ce94c43bb1db2826cd0 > 0c77 100644 > --- a/kernel/trace/rv/Kconfig > +++ b/kernel/trace/rv/Kconfig > @@ -93,3 +93,11 @@ config RV_REACT_PANIC > help > Enables the panic reactor. The panic reactor emits a printk() > message if an exception is found and panic()s the system. > + > +config RV_REACT_SIGNAL > + bool "Signal reactor" > + depends on RV_REACTORS > + default y > + help > + Enables the signal reactor. The signal reactors sends a signal to > the > + task triggering an exception. This assumption is not always correct, imagine a failure in the sleep monitor caused by the wakeup event. The offending task is not current but the wakee. This is a bit tricky because reactors don't have access to that task, just to keep the same implementation between per-cpu and per-task monitors. We may want to revise that, perhaps like Nam is doing with the monitor_target thing [1]. Besides, I'm assuming this reactor is only meaningful for monitors written to validate userspace tasks (signals and TWA_RESUME are probably meaningless/dangerous for kernel threads). I'm fine with that but you should probably mention it here and/or in the reactor itself, since we have also monitors validating kernel behaviour (see the sched collection). [1] - https://lore.kernel.org/lkml/9a1b5a8c449fcb4f1a671016389c1e4fca49a351.1754900299.git.namcao@linutronix.de > diff --git a/kernel/trace/rv/Makefile b/kernel/trace/rv/Makefile > index > 42ff5d5aa9dde3ed2964e0b8d4ab7b236f498319..adf56bbd03ffa0d48de1f0d86ca5fcce1628 > bdba 100644 > --- a/kernel/trace/rv/Makefile > +++ b/kernel/trace/rv/Makefile > @@ -23,3 +23,4 @@ obj-$(CONFIG_RV_MON_OPID) += monitors/opid/opid.o > obj-$(CONFIG_RV_REACTORS) += rv_reactors.o > obj-$(CONFIG_RV_REACT_PRINTK) += reactor_printk.o > obj-$(CONFIG_RV_REACT_PANIC) += reactor_panic.o > +obj-$(CONFIG_RV_REACT_SIGNAL) += reactor_signal.o > diff --git a/kernel/trace/rv/reactor_signal.c > b/kernel/trace/rv/reactor_signal.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..e123786d94371a240beb63b2d1b2f3044a46 > 6404 > --- /dev/null > +++ b/kernel/trace/rv/reactor_signal.c > @@ -0,0 +1,114 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * Copyright (C) 2025 Thomas Weißschuh, Linutronix GmbH > + * > + * Signal RV reactor: > + * Prints the exception msg to the kernel message log and sends a signal to > the offending task. > + */ > + > +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt > + > +#include <linux/cpumask.h> > +#include <linux/init.h> > +#include <linux/mempool.h> > +#include <linux/module.h> > +#include <linux/printk.h> > +#include <linux/rv.h> > +#include <linux/sched/signal.h> > +#include <linux/task_work.h> > + > +struct rv_signal_work { > + struct callback_head twork; > + int signal; > + char message[256]; > +}; > + > +static mempool_t *rv_signal_task_work_pool; > + > +static void rv_signal_force_sig(int signal, const char *message) > +{ > + /* The message already contains a subsystem prefix, so use raw > printk() */ > + printk(KERN_WARNING "%s", message); > + pr_warn("Killing PID %d with signal %d", task_pid_nr(current), > signal); > + force_sig(signal); > +} > + > +static void rv_signal_task_work(struct callback_head *cbh) > +{ > + struct rv_signal_work *work = container_of_const(cbh, struct > rv_signal_work, twork); > + > + rv_signal_force_sig(work->signal, work->message); > + > + mempool_free(work, rv_signal_task_work_pool); > +} > + > +static void rv_reaction_signal(int signal, const char *fmt, va_list args) > +{ > + struct rv_signal_work *work; > + char message[256]; > + > + work = mempool_alloc_preallocated(rv_signal_task_work_pool); > + if (!work) { > + pr_warn_ratelimited("Unable to signal through task_work, > sending directly\n"); > + vsnprintf(message, sizeof(message), fmt, args); > + rv_signal_force_sig(signal, message); > + return; > + } Why do you use the task_work at all instead of signalling directly? If that's something not safe from a (any) tracepoint because it can sleep you should definitely not call it if allocation fails. > + > + init_task_work(&work->twork, rv_signal_task_work); > + work->signal = signal; > + vsnprintf(work->message, sizeof(work->message), fmt, args); > + > + /* > + * The reactor can be called from any context through tracepoints. > + * To avoid any locking or other operations not usable from all > contexts, use TWA_RESUME. > + * The signal might be delayed, but that shouldn't be an issue. > + */ > + task_work_add(current, &work->twork, TWA_RESUME); > +} > + > +__printf(1, 2) > +static void rv_reaction_sigbus(const char *fmt, ...) > +{ > + va_list args; > + > + va_start(args, fmt); > + rv_reaction_signal(SIGBUS, fmt, args); > + va_end(args); > +} > + > +static struct rv_reactor rv_sigbus = { > + .name = "sigbus", > + .description = "Kill the current task with SIGBUS", > + .react = rv_reaction_sigbus, > +}; Let's be consistent and call this reactor "signal", you can use SIGBUS only in the description until/if we allow different signals. What do you think? Thanks, Gabriele
On Fri, Sep 19, 2025 at 02:26:12PM +0200, Gabriele Monaco wrote: > On Fri, 2025-09-19 at 12:49 +0200, Thomas Weißschuh wrote: > > Reactions of the existing reactors are not easily detectable from programs > > and also not easily attributable to the triggering task. > > This makes it hard to test the monitors themselves programmatically. > > > > The signal reactors allows applications to validate the correct operations > > of monitors either by installing custom signal handlers or by forking a > > child and waiting for the expected exit code. > > Thanks, this looks like a really nice addition! Yeah, this will help us write KUnit or kselftest for the rtapp monitor. > > For now only SIGBUS is supported, but additional signals can be added. > > Curious choice of a default signal, is this specific to your use-case? Any signal should do. Looking through the signal list, I think SIGTRAP is more appropriate. > > +config RV_REACT_SIGNAL > > + bool "Signal reactor" > > + depends on RV_REACTORS > > + default y > > + help > > + Enables the signal reactor. The signal reactors sends a signal to > > the > > + task triggering an exception. > > This assumption is not always correct, imagine a failure in the sleep monitor > caused by the wakeup event. The offending task is not current but the wakee. > > This is a bit tricky because reactors don't have access to that task, just to > keep the same implementation between per-cpu and per-task monitors. Yeah, this one is tricky. We probably need to pass the correct task_struct to reactor, but then I'm not sure how to handle the other monitor types, e.g. per-cpu monitors. I have no alternative to offer, let me give it some thought. > > +static void rv_signal_force_sig(int signal, const char *message) > > +{ > > + /* The message already contains a subsystem prefix, so use raw > > printk() */ > > + printk(KERN_WARNING "%s", message); > > + pr_warn("Killing PID %d with signal %d", task_pid_nr(current), > > signal); RV reactors have to use printk_deferred() instead. See: https://lore.kernel.org/lkml/874k50hqmj.fsf@jogness.linutronix.de/ But I suggest dropping the printk from this reactor. We already have a printk reactor for that. > > + force_sig(signal); > > +} > > + > > +static void rv_signal_task_work(struct callback_head *cbh) > > +{ > > + struct rv_signal_work *work = container_of_const(cbh, struct > > rv_signal_work, twork); > > + > > + rv_signal_force_sig(work->signal, work->message); > > + > > + mempool_free(work, rv_signal_task_work_pool); > > +} > > + > > +static void rv_reaction_signal(int signal, const char *fmt, va_list args) > > +{ > > + struct rv_signal_work *work; > > + char message[256]; > > + > > + work = mempool_alloc_preallocated(rv_signal_task_work_pool); > > + if (!work) { > > + pr_warn_ratelimited("Unable to signal through task_work, > > sending directly\n"); > > + vsnprintf(message, sizeof(message), fmt, args); > > + rv_signal_force_sig(signal, message); > > + return; > > + } > > Why do you use the task_work at all instead of signalling directly? > If that's something not safe from a (any) tracepoint because it can sleep If I remember correctly, sending signals requires a spinlock and therefore may sleep on PREEMPT_RT. > you should definitely not call it if allocation fails. Yep. We probably can get away with not reacting at all if allocation fails, by crafting our tests such that only one reaction happens at a time, and allocation won't fail. Nam
On Mon, Sep 22, 2025 at 06:29:00PM +0200, Nam Cao wrote: > On Fri, Sep 19, 2025 at 02:26:12PM +0200, Gabriele Monaco wrote: > > On Fri, 2025-09-19 at 12:49 +0200, Thomas Weißschuh wrote: (...) > > > For now only SIGBUS is supported, but additional signals can be added. > > > > Curious choice of a default signal, is this specific to your use-case? > > Any signal should do. Looking through the signal list, I think SIGTRAP is > more appropriate. Indeed. Thanks for the hint. > > > +config RV_REACT_SIGNAL > > > + bool "Signal reactor" > > > + depends on RV_REACTORS > > > + default y > > > + help > > > + Enables the signal reactor. The signal reactors sends a signal to > > > the > > > + task triggering an exception. > > > > This assumption is not always correct, imagine a failure in the sleep monitor > > caused by the wakeup event. The offending task is not current but the wakee. > > > > This is a bit tricky because reactors don't have access to that task, just to > > keep the same implementation between per-cpu and per-task monitors. > > Yeah, this one is tricky. We probably need to pass the correct task_struct > to reactor, but then I'm not sure how to handle the other monitor types, > e.g. per-cpu monitors. > > I have no alternative to offer, let me give it some thought. Thanks. > > > +static void rv_signal_force_sig(int signal, const char *message) > > > +{ > > > + /* The message already contains a subsystem prefix, so use raw > > > printk() */ > > > + printk(KERN_WARNING "%s", message); > > > + pr_warn("Killing PID %d with signal %d", task_pid_nr(current), > > > signal); > > RV reactors have to use printk_deferred() instead. See: > https://lore.kernel.org/lkml/874k50hqmj.fsf@jogness.linutronix.de/ When the direct-call path is removed, this will only be used through task_work. For that direct printk() should be fine, right? > But I suggest dropping the printk from this reactor. We already have a > printk reactor for that. Works for me. (...) > > Why do you use the task_work at all instead of signalling directly? > > If that's something not safe from a (any) tracepoint because it can sleep > > If I remember correctly, sending signals requires a spinlock and therefore > may sleep on PREEMPT_RT. > > > you should definitely not call it if allocation fails. > > Yep. > > We probably can get away with not reacting at all if allocation fails, by > crafting our tests such that only one reaction happens at a time, and > allocation won't fail. It would be nice if the reactor works without having to worry about its implementation in the testcases or even general users. In 6.18 we will get kmalloc_nolock() which is meant to be usable from tracepoint context. My plan is to use that for the next revision. Thomas
On Mon, 2025-09-22 at 18:29 +0200, Nam Cao wrote: > On Fri, Sep 19, 2025 at 02:26:12PM +0200, Gabriele Monaco wrote: > > On Fri, 2025-09-19 at 12:49 +0200, Thomas Weißschuh wrote: > > > +static void rv_reaction_signal(int signal, const char *fmt, va_list args) > > > +{ > > > + struct rv_signal_work *work; > > > + char message[256]; > > > + > > > + work = mempool_alloc_preallocated(rv_signal_task_work_pool); > > > + if (!work) { > > > + pr_warn_ratelimited("Unable to signal through task_work, > > > sending directly\n"); > > > + vsnprintf(message, sizeof(message), fmt, args); > > > + rv_signal_force_sig(signal, message); > > > + return; > > > + } > > > > Why do you use the task_work at all instead of signalling directly? > > If that's something not safe from a (any) tracepoint because it can sleep > > If I remember correctly, sending signals requires a spinlock and therefore > may sleep on PREEMPT_RT. Yeah that's what I quickly glanced at. Which seems to be the case also for mempool_alloc_preallocated by the way, so I'm not sure that's safer than signalling directly on PREEMPT_RT. Thomas, did you test your reactor on PREEMPT_RT? I'd expect a few fat warnings when this is called from sched tracepoints. Unless you're lucky and never get contention. Lockdep (CONFIG_PROVE_LOCKING) may help here. Thanks, Gabriele > > > you should definitely not call it if allocation fails. > > Yep. > > We probably can get away with not reacting at all if allocation fails, by > crafting our tests such that only one reaction happens at a time, and > allocation won't fail. > > Nam
Hi Gabriele and Nam, Sep 23, 2025 09:43:05 Gabriele Monaco <gmonaco@redhat.com>: > On Mon, 2025-09-22 at 18:29 +0200, Nam Cao wrote: >> On Fri, Sep 19, 2025 at 02:26:12PM +0200, Gabriele Monaco wrote: >>> On Fri, 2025-09-19 at 12:49 +0200, Thomas Weißschuh wrote: >>>> +static void rv_reaction_signal(int signal, const char *fmt, va_list args) >>>> +{ >>>> + struct rv_signal_work *work; >>>> + char message[256]; >>>> + >>>> + work = mempool_alloc_preallocated(rv_signal_task_work_pool); >>>> + if (!work) { >>>> + pr_warn_ratelimited("Unable to signal through task_work, >>>> sending directly\n"); >>>> + vsnprintf(message, sizeof(message), fmt, args); >>>> + rv_signal_force_sig(signal, message); >>>> + return; >>>> + } >>> >>> Why do you use the task_work at all instead of signalling directly? >>> If that's something not safe from a (any) tracepoint because it can sleep >> >> If I remember correctly, sending signals requires a spinlock and therefore >> may sleep on PREEMPT_RT. > > Yeah that's what I quickly glanced at. Which seems to be the case also for > mempool_alloc_preallocated by the way, so I'm not sure that's safer than > signalling directly on PREEMPT_RT. > > Thomas, did you test your reactor on PREEMPT_RT? I'd expect a few fat warnings > when this is called from sched tracepoints. Unless you're lucky and never get > contention. Lockdep (CONFIG_PROVE_LOCKING) may help here. I trusted the documentation, which promised not to sleep. I'll rework it for v2. > Thanks, > Gabriele > >> >>> you should definitely not call it if allocation fails. >> >> Yep. Ack. >> >> We probably can get away with not reacting at all if allocation fails, by >> crafting our tests such that only one reaction happens at a time, and >> allocation won't fail. Ack. I am wondering if it would make sense to add a new tracepoint that fires in addition of the reactors. That would allow multiple simultaneous consumers and also bespoke handlers in userspace. Thomas
© 2016 - 2025 Red Hat, Inc.