Add a monitor for checking that real-time tasks do not go to sleep in a
manner that may cause undesirable latency.
Also change
RV depends on TRACING
to
RV select TRACING
to avoid the following recursive dependency:
error: recursive dependency detected!
symbol TRACING is selected by PREEMPTIRQ_TRACEPOINTS
symbol PREEMPTIRQ_TRACEPOINTS depends on TRACE_IRQFLAGS
symbol TRACE_IRQFLAGS is selected by RV_MON_SLEEP
symbol RV_MON_SLEEP depends on RV
symbol RV depends on TRACING
Reviewed-by: Gabriele Monaco <gmonaco@redhat.com>
Signed-off-by: Nam Cao <namcao@linutronix.de>
---
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Will Deacon <will@kernel.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Waiman Long <longman@redhat.com>
---
kernel/trace/rv/Kconfig | 3 +-
kernel/trace/rv/Makefile | 1 +
kernel/trace/rv/monitors/sleep/Kconfig | 13 ++
kernel/trace/rv/monitors/sleep/sleep.c | 224 ++++++++++++++++++
kernel/trace/rv/monitors/sleep/sleep.h | 232 +++++++++++++++++++
kernel/trace/rv/monitors/sleep/sleep_trace.h | 14 ++
kernel/trace/rv/rv_trace.h | 1 +
tools/verification/models/rtapp/sleep.ltl | 20 ++
8 files changed, 507 insertions(+), 1 deletion(-)
create mode 100644 kernel/trace/rv/monitors/sleep/Kconfig
create mode 100644 kernel/trace/rv/monitors/sleep/sleep.c
create mode 100644 kernel/trace/rv/monitors/sleep/sleep.h
create mode 100644 kernel/trace/rv/monitors/sleep/sleep_trace.h
create mode 100644 tools/verification/models/rtapp/sleep.ltl
diff --git a/kernel/trace/rv/Kconfig b/kernel/trace/rv/Kconfig
index 6f86d8501e87..942d57575e67 100644
--- a/kernel/trace/rv/Kconfig
+++ b/kernel/trace/rv/Kconfig
@@ -20,7 +20,7 @@ config RV_LTL_MONITOR
menuconfig RV
bool "Runtime Verification"
- depends on TRACING
+ select TRACING
help
Enable the kernel runtime verification infrastructure. RV is a
lightweight (yet rigorous) method that complements classical
@@ -43,6 +43,7 @@ source "kernel/trace/rv/monitors/snep/Kconfig"
source "kernel/trace/rv/monitors/sncid/Kconfig"
source "kernel/trace/rv/monitors/rtapp/Kconfig"
source "kernel/trace/rv/monitors/pagefault/Kconfig"
+source "kernel/trace/rv/monitors/sleep/Kconfig"
# Add new monitors here
config RV_REACTORS
diff --git a/kernel/trace/rv/Makefile b/kernel/trace/rv/Makefile
index 353ecf939d0e..13ec2944c665 100644
--- a/kernel/trace/rv/Makefile
+++ b/kernel/trace/rv/Makefile
@@ -14,6 +14,7 @@ obj-$(CONFIG_RV_MON_SNEP) += monitors/snep/snep.o
obj-$(CONFIG_RV_MON_SNCID) += monitors/sncid/sncid.o
obj-$(CONFIG_RV_MON_RTAPP) += monitors/rtapp/rtapp.o
obj-$(CONFIG_RV_MON_PAGEFAULT) += monitors/pagefault/pagefault.o
+obj-$(CONFIG_RV_MON_SLEEP) += monitors/sleep/sleep.o
# Add new monitors here
obj-$(CONFIG_RV_REACTORS) += rv_reactors.o
obj-$(CONFIG_RV_REACT_PRINTK) += reactor_printk.o
diff --git a/kernel/trace/rv/monitors/sleep/Kconfig b/kernel/trace/rv/monitors/sleep/Kconfig
new file mode 100644
index 000000000000..d00aa1aae069
--- /dev/null
+++ b/kernel/trace/rv/monitors/sleep/Kconfig
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+config RV_MON_SLEEP
+ depends on RV
+ select RV_LTL_MONITOR
+ depends on HAVE_SYSCALL_TRACEPOINTS
+ depends on RV_MON_RTAPP
+ select TRACE_IRQFLAGS
+ default y
+ select LTL_MON_EVENTS_ID
+ bool "sleep monitor"
+ help
+ Monitor that real-time tasks do not sleep in a manner that may cause undesirable latency.
diff --git a/kernel/trace/rv/monitors/sleep/sleep.c b/kernel/trace/rv/monitors/sleep/sleep.c
new file mode 100644
index 000000000000..cf4cccd49c35
--- /dev/null
+++ b/kernel/trace/rv/monitors/sleep/sleep.c
@@ -0,0 +1,224 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/ftrace.h>
+#include <linux/tracepoint.h>
+#include <linux/init.h>
+#include <linux/irqflags.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/rv.h>
+#include <linux/sched/deadline.h>
+#include <linux/sched/rt.h>
+#include <rv/instrumentation.h>
+
+#define MODULE_NAME "sleep"
+
+#include <trace/events/syscalls.h>
+#include <trace/events/sched.h>
+#include <trace/events/lock.h>
+#include <uapi/linux/futex.h>
+#include <rv_trace.h>
+#include <monitors/rtapp/rtapp.h>
+
+#include "sleep.h"
+#include <rv/ltl_monitor.h>
+
+static void ltl_atoms_fetch(struct task_struct *task, struct ltl_monitor *mon)
+{
+ /*
+ * This includes "actual" real-time tasks and also PI-boosted tasks. A task being PI-boosted
+ * means it is blocking an "actual" real-task, therefore it should also obey the monitor's
+ * rule, otherwise the "actual" real-task may be delayed.
+ */
+ ltl_atom_set(mon, LTL_RT, rt_or_dl_task(task));
+}
+
+static void ltl_atoms_init(struct task_struct *task, struct ltl_monitor *mon, bool task_creation)
+{
+ ltl_atom_set(mon, LTL_SLEEP, false);
+ ltl_atom_set(mon, LTL_WAKE, false);
+ ltl_atom_set(mon, LTL_WOKEN_BY_HARDIRQ, false);
+ ltl_atom_set(mon, LTL_WOKEN_BY_NMI, false);
+ ltl_atom_set(mon, LTL_WOKEN_BY_EQUAL_OR_HIGHER_PRIO, false);
+
+ if (task_creation) {
+ ltl_atom_set(mon, LTL_KTHREAD_SHOULD_STOP, false);
+ ltl_atom_set(mon, LTL_NANOSLEEP_CLOCK_MONOTONIC, false);
+ ltl_atom_set(mon, LTL_NANOSLEEP_TIMER_ABSTIME, false);
+ ltl_atom_set(mon, LTL_CLOCK_NANOSLEEP, false);
+ ltl_atom_set(mon, LTL_PI_FUTEX, false);
+ ltl_atom_set(mon, LTL_BLOCK_ON_RT_MUTEX, false);
+ }
+
+ if (task->flags & PF_KTHREAD) {
+ ltl_atom_set(mon, LTL_KERNEL_THREAD, true);
+
+ /* kernel tasks do not do syscall */
+ ltl_atom_set(mon, LTL_PI_FUTEX, false);
+ ltl_atom_set(mon, LTL_NANOSLEEP_CLOCK_MONOTONIC, false);
+ ltl_atom_set(mon, LTL_NANOSLEEP_TIMER_ABSTIME, false);
+ ltl_atom_set(mon, LTL_CLOCK_NANOSLEEP, false);
+
+ if (strstarts(task->comm, "migration/"))
+ ltl_atom_set(mon, LTL_TASK_IS_MIGRATION, true);
+ else
+ ltl_atom_set(mon, LTL_TASK_IS_MIGRATION, false);
+
+ if (strstarts(task->comm, "rcu"))
+ ltl_atom_set(mon, LTL_TASK_IS_RCU, true);
+ else
+ ltl_atom_set(mon, LTL_TASK_IS_RCU, false);
+ } else {
+ ltl_atom_set(mon, LTL_KTHREAD_SHOULD_STOP, false);
+ ltl_atom_set(mon, LTL_KERNEL_THREAD, false);
+ ltl_atom_set(mon, LTL_TASK_IS_RCU, false);
+ ltl_atom_set(mon, LTL_TASK_IS_MIGRATION, false);
+ }
+
+}
+
+static void handle_sched_set_state(void *data, struct task_struct *task, int state)
+{
+ if (state & TASK_INTERRUPTIBLE)
+ ltl_atom_pulse(task, LTL_SLEEP, true);
+}
+
+static void handle_sched_wakeup(void *data, struct task_struct *task)
+{
+ ltl_atom_pulse(task, LTL_WAKE, true);
+}
+
+static void handle_sched_waking(void *data, struct task_struct *task)
+{
+ if (this_cpu_read(hardirq_context)) {
+ ltl_atom_pulse(task, LTL_WOKEN_BY_HARDIRQ, true);
+ } else if (in_task()) {
+ if (current->prio <= task->prio)
+ ltl_atom_pulse(task, LTL_WOKEN_BY_EQUAL_OR_HIGHER_PRIO, true);
+ } else if (in_nmi()) {
+ ltl_atom_pulse(task, LTL_WOKEN_BY_NMI, true);
+ }
+}
+
+static void handle_block_on_rt_mutex_begin(void *data, struct task_struct *task)
+{
+ ltl_atom_update(task, LTL_BLOCK_ON_RT_MUTEX, true);
+}
+
+static void handle_block_on_rt_mutex_end(void *data, struct task_struct *task)
+{
+ ltl_atom_update(task, LTL_BLOCK_ON_RT_MUTEX, false);
+}
+
+static void handle_sys_enter(void *data, struct pt_regs *regs, long id)
+{
+ struct ltl_monitor *mon;
+ unsigned long args[6];
+ int op, cmd;
+
+ mon = ltl_get_monitor(current);
+
+ switch (id) {
+ case __NR_clock_nanosleep:
+#ifdef __NR_clock_nanosleep_time64
+ case __NR_clock_nanosleep_time64:
+#endif
+ syscall_get_arguments(current, regs, args);
+ ltl_atom_set(mon, LTL_NANOSLEEP_CLOCK_MONOTONIC, args[0] == CLOCK_MONOTONIC);
+ ltl_atom_set(mon, LTL_NANOSLEEP_TIMER_ABSTIME, args[1] == TIMER_ABSTIME);
+ ltl_atom_update(current, LTL_CLOCK_NANOSLEEP, true);
+ break;
+
+ case __NR_futex:
+#ifdef __NR_futex_time64
+ case __NR_futex_time64:
+#endif
+ syscall_get_arguments(current, regs, args);
+ op = args[1];
+ cmd = op & FUTEX_CMD_MASK;
+
+ switch (cmd) {
+ case FUTEX_LOCK_PI:
+ case FUTEX_LOCK_PI2:
+ case FUTEX_WAIT_REQUEUE_PI:
+ ltl_atom_update(current, LTL_PI_FUTEX, true);
+ }
+ break;
+ }
+}
+
+static void handle_sys_exit(void *data, struct pt_regs *regs, long ret)
+{
+ struct ltl_monitor *mon = ltl_get_monitor(current);
+
+ ltl_atom_set(mon, LTL_PI_FUTEX, false);
+ ltl_atom_set(mon, LTL_NANOSLEEP_CLOCK_MONOTONIC, false);
+ ltl_atom_set(mon, LTL_NANOSLEEP_TIMER_ABSTIME, false);
+ ltl_atom_update(current, LTL_CLOCK_NANOSLEEP, false);
+}
+
+static void handle_kthread_stop(void *data, struct task_struct *task)
+{
+ /* FIXME: this could race with other tracepoint handlers */
+ ltl_atom_update(task, LTL_KTHREAD_SHOULD_STOP, true);
+}
+
+static int enable_sleep(void)
+{
+ int retval;
+
+ retval = ltl_monitor_init();
+ if (retval)
+ return retval;
+
+ rv_attach_trace_probe("rtapp_sleep", sched_waking, handle_sched_waking);
+ rv_attach_trace_probe("rtapp_sleep", sched_wakeup, handle_sched_wakeup);
+ rv_attach_trace_probe("rtapp_sleep", sched_set_state_tp, handle_sched_set_state);
+ rv_attach_trace_probe("rtapp_sleep", block_on_rt_mutex_begin_tp,
+ handle_block_on_rt_mutex_begin);
+ rv_attach_trace_probe("rtapp_sleep", block_on_rt_mutex_end_tp,
+ handle_block_on_rt_mutex_end);
+ rv_attach_trace_probe("rtapp_sleep", sched_kthread_stop, handle_kthread_stop);
+ rv_attach_trace_probe("rtapp_sleep", sys_enter, handle_sys_enter);
+ rv_attach_trace_probe("rtapp_sleep", sys_exit, handle_sys_exit);
+ return 0;
+}
+
+static void disable_sleep(void)
+{
+ rv_detach_trace_probe("rtapp_sleep", sched_waking, handle_sched_waking);
+ rv_detach_trace_probe("rtapp_sleep", sched_wakeup, handle_sched_wakeup);
+ rv_detach_trace_probe("rtapp_sleep", sched_set_state_tp, handle_sched_set_state);
+ rv_detach_trace_probe("rtapp_sleep", block_on_rt_mutex_begin_tp,
+ handle_block_on_rt_mutex_begin);
+ rv_detach_trace_probe("rtapp_sleep", block_on_rt_mutex_end_tp,
+ handle_block_on_rt_mutex_end);
+ rv_detach_trace_probe("rtapp_sleep", sched_kthread_stop, handle_kthread_stop);
+ rv_detach_trace_probe("rtapp_sleep", sys_enter, handle_sys_enter);
+ rv_detach_trace_probe("rtapp_sleep", sys_exit, handle_sys_exit);
+
+ ltl_monitor_destroy();
+}
+
+static struct rv_monitor rv_sleep = {
+ .name = "sleep",
+ .description = "Monitor that RT tasks do not undesirably sleep",
+ .enable = enable_sleep,
+ .disable = disable_sleep,
+};
+
+static int __init register_sleep(void)
+{
+ return rv_register_monitor(&rv_sleep, &rv_rtapp);
+}
+
+static void __exit unregister_sleep(void)
+{
+ rv_unregister_monitor(&rv_sleep);
+}
+
+module_init(register_sleep);
+module_exit(unregister_sleep);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Nam Cao <namcao@linutronix.de>");
+MODULE_DESCRIPTION("sleep: Monitor that RT tasks do not undesirably sleep");
diff --git a/kernel/trace/rv/monitors/sleep/sleep.h b/kernel/trace/rv/monitors/sleep/sleep.h
new file mode 100644
index 000000000000..166c2ffc087e
--- /dev/null
+++ b/kernel/trace/rv/monitors/sleep/sleep.h
@@ -0,0 +1,232 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/rv.h>
+
+#define MONITOR_NAME sleep
+
+enum ltl_atom {
+ LTL_BLOCK_ON_RT_MUTEX,
+ LTL_CLOCK_NANOSLEEP,
+ LTL_KERNEL_THREAD,
+ LTL_KTHREAD_SHOULD_STOP,
+ LTL_NANOSLEEP_CLOCK_MONOTONIC,
+ LTL_NANOSLEEP_TIMER_ABSTIME,
+ LTL_PI_FUTEX,
+ LTL_RT,
+ LTL_SLEEP,
+ LTL_TASK_IS_MIGRATION,
+ LTL_TASK_IS_RCU,
+ LTL_WAKE,
+ LTL_WOKEN_BY_EQUAL_OR_HIGHER_PRIO,
+ LTL_WOKEN_BY_HARDIRQ,
+ LTL_WOKEN_BY_NMI,
+ LTL_NUM_ATOM
+};
+static_assert(LTL_NUM_ATOM <= RV_MAX_LTL_ATOM);
+
+static const char *ltl_atom_str(enum ltl_atom atom)
+{
+ static const char *const names[] = {
+ "bl_on_rt_mu",
+ "cl_na",
+ "ker_th",
+ "kth_sh_st",
+ "na_cl_mo",
+ "na_ti_ab",
+ "pi_fu",
+ "rt",
+ "sl",
+ "ta_mi",
+ "ta_rc",
+ "wak",
+ "wo_eq_hi_pr",
+ "wo_ha",
+ "wo_nm",
+ };
+
+ return names[atom];
+}
+
+enum ltl_buchi_state {
+ S0,
+ S1,
+ S2,
+ S3,
+ S4,
+ S5,
+ S6,
+ S7,
+ RV_NUM_BA_STATES
+};
+static_assert(RV_NUM_BA_STATES <= RV_MAX_BA_STATES);
+
+static void ltl_start(struct task_struct *task, struct ltl_monitor *mon)
+{
+ bool task_is_migration = test_bit(LTL_TASK_IS_MIGRATION, mon->atoms);
+ bool task_is_rcu = test_bit(LTL_TASK_IS_RCU, mon->atoms);
+ bool val35 = task_is_rcu || task_is_migration;
+ bool block_on_rt_mutex = test_bit(LTL_BLOCK_ON_RT_MUTEX, mon->atoms);
+ bool val5 = block_on_rt_mutex || val35;
+ bool kthread_should_stop = test_bit(LTL_KTHREAD_SHOULD_STOP, mon->atoms);
+ bool woken_by_nmi = test_bit(LTL_WOKEN_BY_NMI, mon->atoms);
+ bool val29 = woken_by_nmi || kthread_should_stop;
+ bool woken_by_hardirq = test_bit(LTL_WOKEN_BY_HARDIRQ, mon->atoms);
+ bool val30 = woken_by_hardirq || val29;
+ bool woken_by_equal_or_higher_prio = test_bit(LTL_WOKEN_BY_EQUAL_OR_HIGHER_PRIO,
+ mon->atoms);
+ bool val14 = woken_by_equal_or_higher_prio || val30;
+ bool wake = test_bit(LTL_WAKE, mon->atoms);
+ bool val13 = !wake;
+ bool kernel_thread = test_bit(LTL_KERNEL_THREAD, mon->atoms);
+ bool nanosleep_clock_monotonic = test_bit(LTL_NANOSLEEP_CLOCK_MONOTONIC, mon->atoms);
+ bool nanosleep_timer_abstime = test_bit(LTL_NANOSLEEP_TIMER_ABSTIME, mon->atoms);
+ bool val23 = nanosleep_timer_abstime && nanosleep_clock_monotonic;
+ bool clock_nanosleep = test_bit(LTL_CLOCK_NANOSLEEP, mon->atoms);
+ bool val18 = clock_nanosleep && val23;
+ bool pi_futex = test_bit(LTL_PI_FUTEX, mon->atoms);
+ bool val9 = pi_futex || val18;
+ bool val11 = val9 || kernel_thread;
+ bool sleep = test_bit(LTL_SLEEP, mon->atoms);
+ bool val2 = !sleep;
+ bool rt = test_bit(LTL_RT, mon->atoms);
+ bool val1 = !rt;
+ bool val3 = val1 || val2;
+
+ if (val3)
+ __set_bit(S0, mon->states);
+ if (val11 && val13)
+ __set_bit(S1, mon->states);
+ if (val11 && val14)
+ __set_bit(S5, mon->states);
+ if (val5)
+ __set_bit(S6, mon->states);
+}
+
+static void
+ltl_possible_next_states(struct ltl_monitor *mon, unsigned int state, unsigned long *next)
+{
+ bool task_is_migration = test_bit(LTL_TASK_IS_MIGRATION, mon->atoms);
+ bool task_is_rcu = test_bit(LTL_TASK_IS_RCU, mon->atoms);
+ bool val35 = task_is_rcu || task_is_migration;
+ bool block_on_rt_mutex = test_bit(LTL_BLOCK_ON_RT_MUTEX, mon->atoms);
+ bool val5 = block_on_rt_mutex || val35;
+ bool kthread_should_stop = test_bit(LTL_KTHREAD_SHOULD_STOP, mon->atoms);
+ bool woken_by_nmi = test_bit(LTL_WOKEN_BY_NMI, mon->atoms);
+ bool val29 = woken_by_nmi || kthread_should_stop;
+ bool woken_by_hardirq = test_bit(LTL_WOKEN_BY_HARDIRQ, mon->atoms);
+ bool val30 = woken_by_hardirq || val29;
+ bool woken_by_equal_or_higher_prio = test_bit(LTL_WOKEN_BY_EQUAL_OR_HIGHER_PRIO,
+ mon->atoms);
+ bool val14 = woken_by_equal_or_higher_prio || val30;
+ bool wake = test_bit(LTL_WAKE, mon->atoms);
+ bool val13 = !wake;
+ bool kernel_thread = test_bit(LTL_KERNEL_THREAD, mon->atoms);
+ bool nanosleep_clock_monotonic = test_bit(LTL_NANOSLEEP_CLOCK_MONOTONIC, mon->atoms);
+ bool nanosleep_timer_abstime = test_bit(LTL_NANOSLEEP_TIMER_ABSTIME, mon->atoms);
+ bool val23 = nanosleep_timer_abstime && nanosleep_clock_monotonic;
+ bool clock_nanosleep = test_bit(LTL_CLOCK_NANOSLEEP, mon->atoms);
+ bool val18 = clock_nanosleep && val23;
+ bool pi_futex = test_bit(LTL_PI_FUTEX, mon->atoms);
+ bool val9 = pi_futex || val18;
+ bool val11 = val9 || kernel_thread;
+ bool sleep = test_bit(LTL_SLEEP, mon->atoms);
+ bool val2 = !sleep;
+ bool rt = test_bit(LTL_RT, mon->atoms);
+ bool val1 = !rt;
+ bool val3 = val1 || val2;
+
+ switch (state) {
+ case S0:
+ if (val3)
+ __set_bit(S0, next);
+ if (val11 && val13)
+ __set_bit(S1, next);
+ if (val11 && val14)
+ __set_bit(S5, next);
+ if (val5)
+ __set_bit(S6, next);
+ break;
+ case S1:
+ if (val11 && val13)
+ __set_bit(S1, next);
+ if (val13 && val3)
+ __set_bit(S2, next);
+ if (val13 && val5)
+ __set_bit(S3, next);
+ if (val14 && val3)
+ __set_bit(S4, next);
+ if (val11 && val14)
+ __set_bit(S5, next);
+ if (val14 && val5)
+ __set_bit(S7, next);
+ break;
+ case S2:
+ if (val11 && val13)
+ __set_bit(S1, next);
+ if (val13 && val3)
+ __set_bit(S2, next);
+ if (val13 && val5)
+ __set_bit(S3, next);
+ if (val14 && val3)
+ __set_bit(S4, next);
+ if (val11 && val14)
+ __set_bit(S5, next);
+ if (val14 && val5)
+ __set_bit(S7, next);
+ break;
+ case S3:
+ if (val11 && val13)
+ __set_bit(S1, next);
+ if (val13 && val3)
+ __set_bit(S2, next);
+ if (val13 && val5)
+ __set_bit(S3, next);
+ if (val14 && val3)
+ __set_bit(S4, next);
+ if (val11 && val14)
+ __set_bit(S5, next);
+ if (val14 && val5)
+ __set_bit(S7, next);
+ break;
+ case S4:
+ if (val3)
+ __set_bit(S0, next);
+ if (val11 && val13)
+ __set_bit(S1, next);
+ if (val11 && val14)
+ __set_bit(S5, next);
+ if (val5)
+ __set_bit(S6, next);
+ break;
+ case S5:
+ if (val3)
+ __set_bit(S0, next);
+ if (val11 && val13)
+ __set_bit(S1, next);
+ if (val11 && val14)
+ __set_bit(S5, next);
+ if (val5)
+ __set_bit(S6, next);
+ break;
+ case S6:
+ if (val3)
+ __set_bit(S0, next);
+ if (val11 && val13)
+ __set_bit(S1, next);
+ if (val11 && val14)
+ __set_bit(S5, next);
+ if (val5)
+ __set_bit(S6, next);
+ break;
+ case S7:
+ if (val3)
+ __set_bit(S0, next);
+ if (val11 && val13)
+ __set_bit(S1, next);
+ if (val11 && val14)
+ __set_bit(S5, next);
+ if (val5)
+ __set_bit(S6, next);
+ break;
+ }
+}
diff --git a/kernel/trace/rv/monitors/sleep/sleep_trace.h b/kernel/trace/rv/monitors/sleep/sleep_trace.h
new file mode 100644
index 000000000000..22eaf31da987
--- /dev/null
+++ b/kernel/trace/rv/monitors/sleep/sleep_trace.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Snippet to be included in rv_trace.h
+ */
+
+#ifdef CONFIG_RV_MON_SLEEP
+DEFINE_EVENT(event_ltl_monitor_id, event_sleep,
+ TP_PROTO(struct task_struct *task, char *states, char *atoms, char *next),
+ TP_ARGS(task, states, atoms, next));
+DEFINE_EVENT(error_ltl_monitor_id, error_sleep,
+ TP_PROTO(struct task_struct *task),
+ TP_ARGS(task));
+#endif /* CONFIG_RV_MON_SLEEP */
diff --git a/kernel/trace/rv/rv_trace.h b/kernel/trace/rv/rv_trace.h
index 02c906c9745b..283d5c2fd055 100644
--- a/kernel/trace/rv/rv_trace.h
+++ b/kernel/trace/rv/rv_trace.h
@@ -173,6 +173,7 @@ TRACE_EVENT(error_ltl_monitor_id,
TP_printk("%s[%d]: violation detected", __get_str(comm), __entry->pid)
);
#include <monitors/pagefault/pagefault_trace.h>
+#include <monitors/sleep/sleep_trace.h>
// Add new monitors based on CONFIG_LTL_MON_EVENTS_ID here
#endif /* CONFIG_LTL_MON_EVENTS_ID */
#endif /* _TRACE_RV_H */
diff --git a/tools/verification/models/rtapp/sleep.ltl b/tools/verification/models/rtapp/sleep.ltl
new file mode 100644
index 000000000000..6e2f1ff31163
--- /dev/null
+++ b/tools/verification/models/rtapp/sleep.ltl
@@ -0,0 +1,20 @@
+RULE = always ((RT and SLEEP) imply (RT_FRIENDLY_SLEEP or ALLOWLIST))
+
+RT_FRIENDLY_SLEEP = (RT_VALID_SLEEP_REASON or KERNEL_THREAD)
+ and ((not WAKE) until RT_FRIENDLY_WAKE)
+
+RT_VALID_SLEEP_REASON = PI_FUTEX
+ or RT_FRIENDLY_NANOSLEEP
+
+RT_FRIENDLY_NANOSLEEP = CLOCK_NANOSLEEP
+ and NANOSLEEP_TIMER_ABSTIME
+ and NANOSLEEP_CLOCK_MONOTONIC
+
+RT_FRIENDLY_WAKE = WOKEN_BY_EQUAL_OR_HIGHER_PRIO
+ or WOKEN_BY_HARDIRQ
+ or WOKEN_BY_NMI
+ or KTHREAD_SHOULD_STOP
+
+ALLOWLIST = BLOCK_ON_RT_MUTEX
+ or TASK_IS_RCU
+ or TASK_IS_MIGRATION
--
2.39.5
On Tue, 2025-04-29 at 14:01 +0200, Nam Cao wrote: > Add a monitor for checking that real-time tasks do not go to sleep in > a > manner that may cause undesirable latency. > > Also change > RV depends on TRACING > to > RV select TRACING > to avoid the following recursive dependency: > > error: recursive dependency detected! > symbol TRACING is selected by PREEMPTIRQ_TRACEPOINTS > symbol PREEMPTIRQ_TRACEPOINTS depends on TRACE_IRQFLAGS > symbol TRACE_IRQFLAGS is selected by RV_MON_SLEEP > symbol RV_MON_SLEEP depends on RV > symbol RV depends on TRACING > > Reviewed-by: Gabriele Monaco <gmonaco@redhat.com> > Signed-off-by: Nam Cao <namcao@linutronix.de> > --- > Cc: Peter Zijlstra <peterz@infradead.org> > Cc: Ingo Molnar <mingo@redhat.com> > Cc: Will Deacon <will@kernel.org> > Cc: Boqun Feng <boqun.feng@gmail.com> > Cc: Waiman Long <longman@redhat.com> > --- > > [...] > > +RULE = always ((RT and SLEEP) imply (RT_FRIENDLY_SLEEP or > ALLOWLIST)) > + > +RT_FRIENDLY_SLEEP = (RT_VALID_SLEEP_REASON or KERNEL_THREAD) > + and ((not WAKE) until RT_FRIENDLY_WAKE) > + > +RT_VALID_SLEEP_REASON = PI_FUTEX > + or RT_FRIENDLY_NANOSLEEP > + > +RT_FRIENDLY_NANOSLEEP = CLOCK_NANOSLEEP > + and NANOSLEEP_TIMER_ABSTIME > + and NANOSLEEP_CLOCK_MONOTONIC > + > +RT_FRIENDLY_WAKE = WOKEN_BY_EQUAL_OR_HIGHER_PRIO > + or WOKEN_BY_HARDIRQ > + or WOKEN_BY_NMI > + or KTHREAD_SHOULD_STOP > + > +ALLOWLIST = BLOCK_ON_RT_MUTEX > + or TASK_IS_RCU > + or TASK_IS_MIGRATION So, just thinking out loud, PI_FUTEX is a valid sleep reason, technically also BLOCK_ON_RT_MUTEX is something you are allowing. In my understanding, the contention tracepoints already in the kernel can track all contention by kernel code and are leaving aside the PI futexes, which use the untracked rt_mutex_wait_proxy_lock. In your case, you are tracking PI_FUTEX via the system call, which should cover the above scenario. Do you really need extra tracepoints to track this too? Or is there any other use of start_proxy_lock/wait_proxy_lock I'm missing here? I see the only case in which rt_mutex_start_proxy_lock is called with a task different than current is via FUTEX_CMP_REQUEUE_PI, wouldn't considering this one too make the new tracepoints superfluous (assuming this one is even needed to be tracked before FUTEX_WAIT_REQUEUE_PI). Thanks, Gabriele
On Tue, Apr 29, 2025 at 06:01:01PM +0200, Gabriele Monaco wrote:
> On Tue, 2025-04-29 at 14:01 +0200, Nam Cao wrote:
> > Add a monitor for checking that real-time tasks do not go to sleep in
> > a
> > manner that may cause undesirable latency.
> >
> > Also change
> > RV depends on TRACING
> > to
> > RV select TRACING
> > to avoid the following recursive dependency:
> >
> > error: recursive dependency detected!
> > symbol TRACING is selected by PREEMPTIRQ_TRACEPOINTS
> > symbol PREEMPTIRQ_TRACEPOINTS depends on TRACE_IRQFLAGS
> > symbol TRACE_IRQFLAGS is selected by RV_MON_SLEEP
> > symbol RV_MON_SLEEP depends on RV
> > symbol RV depends on TRACING
> >
> > Reviewed-by: Gabriele Monaco <gmonaco@redhat.com>
> > Signed-off-by: Nam Cao <namcao@linutronix.de>
> > ---
> > Cc: Peter Zijlstra <peterz@infradead.org>
> > Cc: Ingo Molnar <mingo@redhat.com>
> > Cc: Will Deacon <will@kernel.org>
> > Cc: Boqun Feng <boqun.feng@gmail.com>
> > Cc: Waiman Long <longman@redhat.com>
> > ---
> >
> > [...]
> >
> > +RULE = always ((RT and SLEEP) imply (RT_FRIENDLY_SLEEP or
> > ALLOWLIST))
> > +
> > +RT_FRIENDLY_SLEEP = (RT_VALID_SLEEP_REASON or KERNEL_THREAD)
> > + and ((not WAKE) until RT_FRIENDLY_WAKE)
> > +
> > +RT_VALID_SLEEP_REASON = PI_FUTEX
> > + or RT_FRIENDLY_NANOSLEEP
> > +
> > +RT_FRIENDLY_NANOSLEEP = CLOCK_NANOSLEEP
> > + and NANOSLEEP_TIMER_ABSTIME
> > + and NANOSLEEP_CLOCK_MONOTONIC
> > +
> > +RT_FRIENDLY_WAKE = WOKEN_BY_EQUAL_OR_HIGHER_PRIO
> > + or WOKEN_BY_HARDIRQ
> > + or WOKEN_BY_NMI
> > + or KTHREAD_SHOULD_STOP
> > +
> > +ALLOWLIST = BLOCK_ON_RT_MUTEX
> > + or TASK_IS_RCU
> > + or TASK_IS_MIGRATION
>
> So, just thinking out loud, PI_FUTEX is a valid sleep reason,
> technically also BLOCK_ON_RT_MUTEX is something you are allowing.
>
> In my understanding, the contention tracepoints already in the kernel
> can track all contention by kernel code and are leaving aside the PI
> futexes, which use the untracked rt_mutex_wait_proxy_lock.
>
> In your case, you are tracking PI_FUTEX via the system call, which
> should cover the above scenario.
>
> Do you really need extra tracepoints to track this too? Or is there any
> other use of start_proxy_lock/wait_proxy_lock I'm missing here?
>
> I see the only case in which rt_mutex_start_proxy_lock is called with a
> task different than current is via FUTEX_CMP_REQUEUE_PI, wouldn't
> considering this one too make the new tracepoints superfluous (assuming
> this one is even needed to be tracked before FUTEX_WAIT_REQUEUE_PI).
The monitor allows PI_FUTEX and allows BLOCK_ON_RT_MUTEX in different
manners.
PI_FUTEX is only a valid sleep reason. If a task sleeps with PI_FUTEX=true,
it still has to obey ((not WAKE) until RT_FRIENDLY_WAKE)
On the other hand, BLOCK_ON_RT_MUTEX alone is good enough. Waker is not
checked due to how rt_mutex is implemented: when a task unlocks an rt_mutex
and wakes waiter, the task is priority-deboosted first before doing the
wakeup, and we would see a false positive warning. See
rt_mutex_slowunlock().
In the case of futex_lock_pi(), both PI_FUTEX and BLOCK_ON_RT_MUTEX is
true. Therefore we don't check the waker.
However, in the case of futex_wait_requeue_pi(), PI_FUTEX is true but
BLOCK_ON_RT_MUTEX is false. In this case, we check the waker.
So, what happens if we don't have the tracepoint in *_proxy_lock()? The
futex_lock_pi() may generate a false positive warning, because we check the
waker and the waker may have lower priority.
But now that you mention it, perhaps instead of PI_FUTEX, we could define
FUTEX_LOCK_PI and FUTEX_WAIT_REQUEUE_PI separately. And we don't check the
waker if FUTEX_LOCK_PI=true. Something like the diff below.
Then we wouldn't need the block_on_rt_mutex tracepoints anymore. And the
specification is a bit more obvious.
Having a second pair of eyes is great, thanks!
Nam
diff --git a/tools/verification/models/rtapp/sleep.ltl b/tools/verification/models/rtapp/sleep.ltl
index 6e2f1ff31163..1f26e58e72f8 100644
--- a/tools/verification/models/rtapp/sleep.ltl
+++ b/tools/verification/models/rtapp/sleep.ltl
@@ -3,7 +3,7 @@ RULE = always ((RT and SLEEP) imply (RT_FRIENDLY_SLEEP or ALLOWLIST))
RT_FRIENDLY_SLEEP = (RT_VALID_SLEEP_REASON or KERNEL_THREAD)
and ((not WAKE) until RT_FRIENDLY_WAKE)
-RT_VALID_SLEEP_REASON = PI_FUTEX
+RT_VALID_SLEEP_REASON = FUTEX_WAIT_REQUEUE_PI
or RT_FRIENDLY_NANOSLEEP
RT_FRIENDLY_NANOSLEEP = CLOCK_NANOSLEEP
@@ -16,5 +16,6 @@ RT_FRIENDLY_WAKE = WOKEN_BY_EQUAL_OR_HIGHER_PRIO
or KTHREAD_SHOULD_STOP
ALLOWLIST = BLOCK_ON_RT_MUTEX
+ or FUTEX_LOCK_PI
or TASK_IS_RCU
or TASK_IS_MIGRATION
On Tue, 2025-04-29 at 19:20 +0200, Nam Cao wrote: > On Tue, Apr 29, 2025 at 06:01:01PM +0200, Gabriele Monaco wrote: > > On Tue, 2025-04-29 at 14:01 +0200, Nam Cao wrote: > > > Add a monitor for checking that real-time tasks do not go to > > > sleep in > > > a > > > manner that may cause undesirable latency. > > > > > > Also change > > > RV depends on TRACING > > > to > > > RV select TRACING > > > to avoid the following recursive dependency: > > > > > > error: recursive dependency detected! > > > symbol TRACING is selected by PREEMPTIRQ_TRACEPOINTS > > > symbol PREEMPTIRQ_TRACEPOINTS depends on TRACE_IRQFLAGS > > > symbol TRACE_IRQFLAGS is selected by RV_MON_SLEEP > > > symbol RV_MON_SLEEP depends on RV > > > symbol RV depends on TRACING > > > > > > Reviewed-by: Gabriele Monaco <gmonaco@redhat.com> > > > Signed-off-by: Nam Cao <namcao@linutronix.de> > > > --- > > > Cc: Peter Zijlstra <peterz@infradead.org> > > > Cc: Ingo Molnar <mingo@redhat.com> > > > Cc: Will Deacon <will@kernel.org> > > > Cc: Boqun Feng <boqun.feng@gmail.com> > > > Cc: Waiman Long <longman@redhat.com> > > > --- > > > > > > [...] > > > > > > +RULE = always ((RT and SLEEP) imply (RT_FRIENDLY_SLEEP or > > > ALLOWLIST)) > > > + > > > +RT_FRIENDLY_SLEEP = (RT_VALID_SLEEP_REASON or KERNEL_THREAD) > > > + and ((not WAKE) until RT_FRIENDLY_WAKE) > > > + > > > +RT_VALID_SLEEP_REASON = PI_FUTEX > > > + or RT_FRIENDLY_NANOSLEEP > > > + > > > +RT_FRIENDLY_NANOSLEEP = CLOCK_NANOSLEEP > > > + and NANOSLEEP_TIMER_ABSTIME > > > + and NANOSLEEP_CLOCK_MONOTONIC > > > + > > > +RT_FRIENDLY_WAKE = WOKEN_BY_EQUAL_OR_HIGHER_PRIO > > > + or WOKEN_BY_HARDIRQ > > > + or WOKEN_BY_NMI > > > + or KTHREAD_SHOULD_STOP > > > + > > > +ALLOWLIST = BLOCK_ON_RT_MUTEX > > > + or TASK_IS_RCU > > > + or TASK_IS_MIGRATION > > > > So, just thinking out loud, PI_FUTEX is a valid sleep reason, > > technically also BLOCK_ON_RT_MUTEX is something you are allowing. > > > > In my understanding, the contention tracepoints already in the > > kernel > > can track all contention by kernel code and are leaving aside the > > PI > > futexes, which use the untracked rt_mutex_wait_proxy_lock. > > > > In your case, you are tracking PI_FUTEX via the system call, which > > should cover the above scenario. > > > > Do you really need extra tracepoints to track this too? Or is there > > any > > other use of start_proxy_lock/wait_proxy_lock I'm missing here? > > > > I see the only case in which rt_mutex_start_proxy_lock is called > > with a > > task different than current is via FUTEX_CMP_REQUEUE_PI, wouldn't > > considering this one too make the new tracepoints superfluous > > (assuming > > this one is even needed to be tracked before > > FUTEX_WAIT_REQUEUE_PI). > > The monitor allows PI_FUTEX and allows BLOCK_ON_RT_MUTEX in different > manners. > > PI_FUTEX is only a valid sleep reason. If a task sleeps with > PI_FUTEX=true, > it still has to obey ((not WAKE) until RT_FRIENDLY_WAKE) > > On the other hand, BLOCK_ON_RT_MUTEX alone is good enough. Waker is > not > checked due to how rt_mutex is implemented: when a task unlocks an > rt_mutex > and wakes waiter, the task is priority-deboosted first before doing > the > wakeup, and we would see a false positive warning. See > rt_mutex_slowunlock(). > > In the case of futex_lock_pi(), both PI_FUTEX and BLOCK_ON_RT_MUTEX > is > true. Therefore we don't check the waker. > > However, in the case of futex_wait_requeue_pi(), PI_FUTEX is true but > BLOCK_ON_RT_MUTEX is false. In this case, we check the waker. > > So, what happens if we don't have the tracepoint in *_proxy_lock()? > The > futex_lock_pi() may generate a false positive warning, because we > check the > waker and the waker may have lower priority. > > But now that you mention it, perhaps instead of PI_FUTEX, we could > define > FUTEX_LOCK_PI and FUTEX_WAIT_REQUEUE_PI separately. And we don't > check the > waker if FUTEX_LOCK_PI=true. Something like the diff below. > > Then we wouldn't need the block_on_rt_mutex tracepoints anymore. And > the > specification is a bit more obvious. > > Having a second pair of eyes is great, thanks! > > Nam > > diff --git a/tools/verification/models/rtapp/sleep.ltl > b/tools/verification/models/rtapp/sleep.ltl > index 6e2f1ff31163..1f26e58e72f8 100644 > --- a/tools/verification/models/rtapp/sleep.ltl > +++ b/tools/verification/models/rtapp/sleep.ltl > @@ -3,7 +3,7 @@ RULE = always ((RT and SLEEP) imply > (RT_FRIENDLY_SLEEP or ALLOWLIST)) > RT_FRIENDLY_SLEEP = (RT_VALID_SLEEP_REASON or KERNEL_THREAD) > and ((not WAKE) until RT_FRIENDLY_WAKE) > > -RT_VALID_SLEEP_REASON = PI_FUTEX > +RT_VALID_SLEEP_REASON = FUTEX_WAIT_REQUEUE_PI > or RT_FRIENDLY_NANOSLEEP > > RT_FRIENDLY_NANOSLEEP = CLOCK_NANOSLEEP > @@ -16,5 +16,6 @@ RT_FRIENDLY_WAKE = WOKEN_BY_EQUAL_OR_HIGHER_PRIO > or KTHREAD_SHOULD_STOP > > ALLOWLIST = BLOCK_ON_RT_MUTEX > + or FUTEX_LOCK_PI > or TASK_IS_RCU > or TASK_IS_MIGRATION Well, it was more complicated than I pictured it, glad my babbling helped in finding another solution ;) I've got one more small remark though, the name ALLOWLIST doesn't give justice to what you explained above. It suggests me something potentially wrong that we can't do much about, while in case of RT mutexes and futex lock, we just don't want the monitor to yell in a perfectly RT-compliant scenario. What is happening here, from what I see, is that the kernel is handling the RT behaviour and your monitor is just meant to tell when userspace is doing something it could do better (unless we deal with kthreads, there we are in fact whitelisting the ones we know are not complying). What about calling it RT_KERNEL_MANAGED_SLEEP or something along the line to say we just trust what the kernel is doing? Does this make sense to you? Thanks, Gabriele
On Wed, Apr 30, 2025 at 10:05:07AM +0200, Gabriele Monaco wrote: > I've got one more small remark though, the name ALLOWLIST doesn't give > justice to what you explained above. > It suggests me something potentially wrong that we can't do much about, > while in case of RT mutexes and futex lock, we just don't want the > monitor to yell in a perfectly RT-compliant scenario. > > What is happening here, from what I see, is that the kernel is handling > the RT behaviour and your monitor is just meant to tell when userspace > is doing something it could do better (unless we deal with kthreads, > there we are in fact whitelisting the ones we know are not complying). > > What about calling it RT_KERNEL_MANAGED_SLEEP or something along the > line to say we just trust what the kernel is doing? That would also work. The generated automaton should be exactly the same. But I think this is quite subjective, so let's not argue too much about it. In short, I prefer it as is, sorry. I think "ALLOWLIST" is a suitable name. From Wikipedia: "A whitelist or allowlist is a list or register of entities that are being provided a particular privilege, service, mobility, access or recognition. Entities on the list will be accepted, approved and/or recognized". We trust rt_mutex and futex_lock_pi to do the right things, so I think they belong to the allowlist. We also trust the RCU thread and the migration/ threads to be correct. Best regards, Nam
© 2016 - 2026 Red Hat, Inc.