Documentation/trace/events.rst | 25 ++++ kernel/trace/trace.c | 11 +- kernel/trace/trace.h | 15 +- kernel/trace/trace_events.c | 70 +++++----- kernel/trace/trace_events_trigger.c | 205 +++++++++++++++++++++++++++- 5 files changed, 284 insertions(+), 42 deletions(-)
From: Steven Rostedt <rostedt@goodmis.org>
Currently a trigger can only be added to individual events. Some triggers
(like stacktrace) can be useful to add as a bulk trigger for a set of
system events (like interrupt or scheduling).
Add a trigger file to the system directories:
/sys/kernel/tracing/events/*/trigger
And allow stacktrace trigger to be enabled for all those events.
Writing into the system/trigger file acts the same as writing into each of
the system event's trigger files individually.
This also allows to remove a trigger from all events in a subsystem (even
if it's not a subsystem trigger!).
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
Note, this is based on top of:
https://patchwork.kernel.org/project/linux-trace-kernel/cover/20251120205600.570673392@kernel.org/
Documentation/trace/events.rst | 25 ++++
kernel/trace/trace.c | 11 +-
kernel/trace/trace.h | 15 +-
kernel/trace/trace_events.c | 70 +++++-----
kernel/trace/trace_events_trigger.c | 205 +++++++++++++++++++++++++++-
5 files changed, 284 insertions(+), 42 deletions(-)
diff --git a/Documentation/trace/events.rst b/Documentation/trace/events.rst
index 18d112963dec..caa4958af43a 100644
--- a/Documentation/trace/events.rst
+++ b/Documentation/trace/events.rst
@@ -416,6 +416,31 @@ way, so beware about making generalizations between the two.
can also enable triggers that are written into
/sys/kernel/tracing/events/ftrace/print/trigger
+The system directory also has a trigger file that allows some triggers to be
+set for all the system's events. This is limited to only a small subset of the
+triggers and does not allow for the count parameter. But it does allow for
+filters. Writing into this file is the same as writing into each of the
+system's event's trigger files individually. Although only a subset of
+triggers may use this file for enabling, all triggers may use this file for
+disabling::
+
+ cd /sys/kernel/tracing
+ cat events/sched/trigger
+ # Available system triggers:
+ # stacktrace
+
+ echo stacktrace > events/sched/trigger
+ cat events/sched/sched_switch/trigger
+ stacktrace:unlimited
+
+ echo snapshot > events/sched/sched_waking/trigger
+ cat events/sched/sched_waking/trigger
+ snapshot:unlimited
+ echo '!snapshot' > events/sched/trigger
+ cat events/sched/sched_waking/trigger
+ # Available triggers:
+ # traceon traceoff snapshot stacktrace enable_event disable_event enable_hist disable_hist hist
+
6.1 Expression syntax
---------------------
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 59cd4ed8af6d..d400c013d42b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -592,11 +592,12 @@ void trace_set_ring_buffer_expanded(struct trace_array *tr)
LIST_HEAD(ftrace_trace_arrays);
-int trace_array_get(struct trace_array *this_tr)
+int __trace_array_get(struct trace_array *this_tr)
{
struct trace_array *tr;
- guard(mutex)(&trace_types_lock);
+ lockdep_assert_held(&trace_types_lock);
+
list_for_each_entry(tr, &ftrace_trace_arrays, list) {
if (tr == this_tr) {
tr->ref++;
@@ -607,6 +608,12 @@ int trace_array_get(struct trace_array *this_tr)
return -ENODEV;
}
+int trace_array_get(struct trace_array *tr)
+{
+ guard(mutex)(&trace_types_lock);
+ return __trace_array_get(tr);
+}
+
static void __trace_array_put(struct trace_array *this_tr)
{
WARN_ON(!this_tr->ref);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index fd5a6daa6c25..7379763a057d 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -469,10 +469,14 @@ extern struct list_head ftrace_trace_arrays;
extern struct mutex trace_types_lock;
extern int trace_array_get(struct trace_array *tr);
+extern int __trace_array_get(struct trace_array *tr);
extern int tracing_check_open_get_tr(struct trace_array *tr);
extern struct trace_array *trace_array_find(const char *instance);
extern struct trace_array *trace_array_find_get(const char *instance);
+extern struct trace_subsystem_dir *trace_get_system_dir(struct inode *inode);
+void trace_put_system_dir(struct trace_subsystem_dir *dir);
+
extern u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe);
extern int tracing_set_filter_buffering(struct trace_array *tr, bool set);
extern int tracing_set_clock(struct trace_array *tr, const char *clockstr);
@@ -1774,6 +1778,7 @@ static inline struct trace_event_file *event_file_file(struct file *filp)
}
extern const struct file_operations event_trigger_fops;
+extern const struct file_operations event_system_trigger_fops;
extern const struct file_operations event_hist_fops;
extern const struct file_operations event_hist_debug_fops;
extern const struct file_operations event_inject_fops;
@@ -2057,10 +2062,16 @@ struct event_command {
* regardless of whether or not it has a filter associated with
* it (filters make a trigger require access to the trace record
* but are not always present).
+ *
+ * @SYSTEM: A flag that says whether or not this command can be used
+ * at the event system level. For example, can it be written into
+ * events/sched/trigger file where it will be enabled for all
+ * sched events?
*/
enum event_command_flags {
- EVENT_CMD_FL_POST_TRIGGER = 1,
- EVENT_CMD_FL_NEEDS_REC = 2,
+ EVENT_CMD_FL_POST_TRIGGER = BIT(1),
+ EVENT_CMD_FL_NEEDS_REC = BIT(2),
+ EVENT_CMD_FL_SYSTEM = BIT(3),
};
static inline bool event_command_post_trigger(struct event_command *cmd_ops)
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 9b07ad9eb284..f00b41f73fc2 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -2168,51 +2168,52 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
static LIST_HEAD(event_subsystems);
-static int subsystem_open(struct inode *inode, struct file *filp)
+struct trace_subsystem_dir *trace_get_system_dir(struct inode *inode)
{
- struct trace_subsystem_dir *dir = NULL, *iter_dir;
- struct trace_array *tr = NULL, *iter_tr;
- struct event_subsystem *system = NULL;
- int ret;
+ struct trace_subsystem_dir *dir;
+ struct trace_array *tr = NULL;
- if (tracing_is_disabled())
- return -ENODEV;
+ guard(mutex)(&event_mutex);
+ guard(mutex)(&trace_types_lock);
/* Make sure the system still exists */
- mutex_lock(&event_mutex);
- mutex_lock(&trace_types_lock);
- list_for_each_entry(iter_tr, &ftrace_trace_arrays, list) {
- list_for_each_entry(iter_dir, &iter_tr->systems, list) {
- if (iter_dir == inode->i_private) {
+ list_for_each_entry(tr, &ftrace_trace_arrays, list) {
+ list_for_each_entry(dir, &tr->systems, list) {
+ if (dir == inode->i_private) {
/* Don't open systems with no events */
- tr = iter_tr;
- dir = iter_dir;
- if (dir->nr_events) {
- __get_system_dir(dir);
- system = dir->subsystem;
- }
- goto exit_loop;
+ if (!dir->nr_events)
+ return NULL;
+ if (__trace_array_get(tr) < 0)
+ return NULL;
+ __get_system_dir(dir);
+ return dir;
}
}
}
- exit_loop:
- mutex_unlock(&trace_types_lock);
- mutex_unlock(&event_mutex);
+ return NULL;
+}
- if (!system)
+void trace_put_system_dir(struct trace_subsystem_dir *dir)
+{
+ trace_array_put(dir->tr);
+ put_system(dir);
+}
+
+static int subsystem_open(struct inode *inode, struct file *filp)
+{
+ struct trace_subsystem_dir *dir;
+ int ret;
+
+ if (tracing_is_disabled())
return -ENODEV;
- /* Still need to increment the ref count of the system */
- if (trace_array_get(tr) < 0) {
- put_system(dir);
+ dir = trace_get_system_dir(inode);
+ if (!dir)
return -ENODEV;
- }
ret = tracing_open_generic(inode, filp);
- if (ret < 0) {
- trace_array_put(tr);
- put_system(dir);
- }
+ if (ret < 0)
+ trace_put_system_dir(dir);
return ret;
}
@@ -2761,6 +2762,9 @@ static int system_callback(const char *name, umode_t *mode, void **data,
else if (strcmp(name, "enable") == 0)
*fops = &ftrace_system_enable_fops;
+ else if (strcmp(name, "trigger") == 0)
+ *fops = &event_system_trigger_fops;
+
else
return 0;
@@ -2784,6 +2788,10 @@ event_subsystem_dir(struct trace_array *tr, const char *name,
{
.name = "enable",
.callback = system_callback,
+ },
+ {
+ .name = "trigger",
+ .callback = system_callback,
}
};
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index 1dfe69146a81..b621406054b7 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -329,21 +329,28 @@ int trigger_process_regex(struct trace_event_file *file, char *buff)
return -EINVAL;
}
+static char *get_user_buf(const char __user *ubuf, size_t cnt)
+{
+ if (!cnt)
+ return NULL;
+
+ if (cnt >= PAGE_SIZE)
+ return ERR_PTR(-EINVAL);
+
+ return memdup_user_nul(ubuf, cnt);
+}
+
static ssize_t event_trigger_regex_write(struct file *file,
const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
struct trace_event_file *event_file;
ssize_t ret;
- char *buf __free(kfree) = NULL;
+ char *buf __free(kfree) = get_user_buf(ubuf, cnt);
- if (!cnt)
+ if (!buf)
return 0;
- if (cnt >= PAGE_SIZE)
- return -EINVAL;
-
- buf = memdup_user_nul(ubuf, cnt);
if (IS_ERR(buf))
return PTR_ERR(buf);
@@ -397,6 +404,190 @@ const struct file_operations event_trigger_fops = {
.release = event_trigger_release,
};
+static ssize_t
+event_system_trigger_read(struct file *filp, char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ char *buf __free(kfree) = kmalloc(SZ_4K, GFP_KERNEL);
+ struct event_command *p;
+ struct seq_buf s;
+ int len;
+
+ if (!buf)
+ return -ENOMEM;
+
+ seq_buf_init(&s, buf, SZ_4K);
+
+ seq_buf_puts(&s, "# Available system triggers:\n");
+ seq_buf_putc(&s, '#');
+
+ guard(mutex)(&trigger_cmd_mutex);
+ list_for_each_entry_reverse(p, &trigger_commands, list) {
+ if (p->flags & EVENT_CMD_FL_SYSTEM)
+ seq_buf_printf(&s, " %s", p->name);
+ }
+ seq_buf_putc(&s, '\n');
+
+ len = seq_buf_used(&s);
+
+ if (*ppos >= len)
+ return 0;
+
+ len -= *ppos;
+
+ if (count > len)
+ count = len;
+
+ if (copy_to_user(ubuf, buf + *ppos, count))
+ return -EFAULT;
+
+ *ppos += count;
+
+ return count;
+}
+
+static int process_system_events(struct trace_subsystem_dir *dir,
+ struct event_command *p, char *buff,
+ char *command, char *next)
+{
+ struct event_subsystem *system = dir->subsystem;
+ struct trace_event_file *file;
+ struct trace_array *tr = dir->tr;
+ bool remove = false;
+ int ret = 0;
+
+ if (buff[0] == '!')
+ remove = true;
+
+ lockdep_assert_held(&event_mutex);
+
+ list_for_each_entry(file, &tr->events, list) {
+
+ if (strcmp(system->name, file->event_call->class->system) != 0)
+ continue;
+
+ ret = p->parse(p, file, buff, command, next);
+
+ /* Removals and existing events do not error */
+ if (ret < 0 && ret != -EEXIST && !remove) {
+ pr_warn("Failed adding trigger %s on %s\n",
+ command, trace_event_name(file->event_call));
+ }
+ }
+ return 0;
+}
+
+static ssize_t
+event_system_trigger_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ struct trace_subsystem_dir *dir = filp->private_data;
+ struct event_command *p;
+ char *command, *next;
+ char *buf __free(kfree) = get_user_buf(ubuf, cnt);
+ bool remove = false;
+ bool found = false;
+ ssize_t ret;
+ int len;
+
+ if (!buf)
+ return 0;
+
+ if (IS_ERR(buf))
+ return PTR_ERR(buf);
+
+ /* system triggers are not allowed to have counters */
+ if (strchr(buf, ':'))
+ return -EINVAL;
+
+ /* If opened for read too, dir is in the seq_file descriptor */
+ if (filp->f_mode & FMODE_READ) {
+ struct seq_file *m = filp->private_data;
+ dir = m->private;
+ }
+
+ /* Skip added space at beginning of buf */
+ next = buf;
+ strim(next);
+
+ command = strsep(&next, " \t");
+ if (next) {
+ next = skip_spaces(next);
+ if (!*next)
+ next = NULL;
+ }
+ if (command[0] == '!') {
+ remove = true;
+ command++;
+ }
+
+ len = strlen(command);
+ if (next)
+ len += strlen(next) + 1;
+
+ guard(mutex)(&event_mutex);
+ guard(mutex)(&trigger_cmd_mutex);
+
+ list_for_each_entry(p, &trigger_commands, list) {
+ /* Allow to remove any trigger */
+ if (!remove && !(p->flags & EVENT_CMD_FL_SYSTEM))
+ continue;
+ if (strcmp(p->name, command) == 0) {
+ found = true;
+ ret = process_system_events(dir, p, buf, command, next);
+ break;
+ }
+ }
+
+ if (!found)
+ ret = -ENODEV;
+
+ if (!ret)
+ *ppos += cnt;
+
+ if (remove || ret < 0)
+ return ret ? : cnt;
+
+ return cnt;
+}
+
+static int
+event_system_trigger_open(struct inode *inode, struct file *file)
+{
+ struct trace_subsystem_dir *dir;
+ int ret;
+
+ ret = security_locked_down(LOCKDOWN_TRACEFS);
+ if (ret)
+ return ret;
+
+ dir = trace_get_system_dir(inode);
+ if (!dir)
+ return -ENODEV;
+
+ file->private_data = dir;
+
+ return ret;
+}
+
+static int
+event_system_trigger_release(struct inode *inode, struct file *file)
+{
+ struct trace_subsystem_dir *dir = inode->i_private;
+
+ trace_put_system_dir(dir);
+
+ return 0;
+}
+
+const struct file_operations event_system_trigger_fops = {
+ .open = event_system_trigger_open,
+ .read = event_system_trigger_read,
+ .write = event_system_trigger_write,
+ .llseek = tracing_lseek,
+ .release = event_system_trigger_release,
+};
+
/*
* Currently we only register event commands from __init, so mark this
* __init too.
@@ -1587,7 +1778,7 @@ stacktrace_trigger_print(struct seq_file *m, struct event_trigger_data *data)
static struct event_command trigger_stacktrace_cmd = {
.name = "stacktrace",
.trigger_type = ETT_STACKTRACE,
- .flags = EVENT_CMD_FL_POST_TRIGGER,
+ .flags = EVENT_CMD_FL_POST_TRIGGER | EVENT_CMD_FL_SYSTEM,
.parse = event_trigger_parse,
.reg = register_trigger,
.unreg = unregister_trigger,
--
2.51.0
Hi Steven,
kernel test robot noticed the following build warnings:
[auto build test WARNING on trace/for-next]
[also build test WARNING on linus/master v6.18-rc6 next-20251121]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Steven-Rostedt/tracing-Add-system-trigger-file-to-enable-triggers-for-all-the-system-s-events/20251121-050454
base: https://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace for-next
patch link: https://lore.kernel.org/r/20251120160003.2fa33d80%40gandalf.local.home
patch subject: [PATCH] tracing: Add system trigger file to enable triggers for all the system's events
config: sparc64-defconfig (https://download.01.org/0day-ci/archive/20251122/202511220730.PQFWRcck-lkp@intel.com/config)
compiler: clang version 20.1.8 (https://github.com/llvm/llvm-project 87f0227cb60147a26a1eeb4fb06e3b505e9c7261)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20251122/202511220730.PQFWRcck-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202511220730.PQFWRcck-lkp@intel.com/
All warnings (new ones prefixed by >>):
>> kernel/trace/trace_events_trigger.c:432:6: warning: variable 'len' set but not used [-Wunused-but-set-variable]
432 | int len;
| ^
1 warning generated.
vim +/len +432 kernel/trace/trace_events_trigger.c
420
421 static ssize_t
422 event_system_trigger_write(struct file *filp, const char __user *ubuf,
423 size_t cnt, loff_t *ppos)
424 {
425 struct trace_subsystem_dir *dir = filp->private_data;
426 struct event_command *p;
427 char *command, *next;
428 char *buf __free(kfree) = get_user_buf(ubuf, cnt);
429 bool remove = false;
430 bool found = false;
431 ssize_t ret;
> 432 int len;
433
434 if (!buf)
435 return 0;
436
437 if (IS_ERR(buf))
438 return PTR_ERR(buf);
439
440 /* system triggers are not allowed to have counters */
441 if (strchr(buf, ':'))
442 return -EINVAL;
443
444 /* If opened for read too, dir is in the seq_file descriptor */
445 if (filp->f_mode & FMODE_READ) {
446 struct seq_file *m = filp->private_data;
447 dir = m->private;
448 }
449
450 /* Skip added space at beginning of buf */
451 next = buf;
452 strim(next);
453
454 command = strsep(&next, " \t");
455 if (next) {
456 next = skip_spaces(next);
457 if (!*next)
458 next = NULL;
459 }
460 if (command[0] == '!') {
461 remove = true;
462 command++;
463 }
464
465 len = strlen(command);
466 if (next)
467 len += strlen(next) + 1;
468
469 guard(mutex)(&event_mutex);
470 guard(mutex)(&trigger_cmd_mutex);
471
472 list_for_each_entry(p, &trigger_commands, list) {
473 /* Allow to remove any trigger */
474 if (!remove && !(p->flags & EVENT_CMD_FL_SYSTEM))
475 continue;
476 if (strcmp(p->name, command) == 0) {
477 found = true;
478 ret = process_system_events(dir, p, buf, command, next);
479 break;
480 }
481 }
482
483 if (!found)
484 ret = -ENODEV;
485
486 if (!ret)
487 *ppos += cnt;
488
489 if (remove || ret < 0)
490 return ret ? : cnt;
491
492 return cnt;
493 }
494
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
© 2016 - 2025 Red Hat, Inc.