[PATCH v4 2/4] tracing: Make the backup instance non-reusable

Masami Hiramatsu (Google) posted 4 patches 3 weeks ago
There is a newer version of this series
[PATCH v4 2/4] tracing: Make the backup instance non-reusable
Posted by Masami Hiramatsu (Google) 3 weeks ago
From: Masami Hiramatsu (Google) <mhiramat@kernel.org>

Since there is no reason to reuse the backup instance, make it
readonly (but erasable).
Note that only backup instances are readonly, because
other trace instances will be empty unless it is writable.
Only backup instances have copy entries from the original.

With this change, most of the trace control files are removed
from the backup instance, including eventfs enable/filter etc.

 # find /sys/kernel/tracing/instances/backup/events/ | wc -l
 4093
 # find /sys/kernel/tracing/instances/boot_map/events/ | wc -l
 9573

Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
 Changes in v4:
  - Make trace data erasable. (not reusable)
 Changes in v3:
  - Resuse the beginning part of event_entries for readonly files.
  - Remove readonly file_operations and checking readonly flag in
    each write operation.
 Changes in v2:
  - Use readonly file_operations to prohibit writing instead of
    checking flags in write() callbacks.
  - Remove writable files from eventfs.
---
 kernel/trace/trace.c        |   93 ++++++++++++++++++++++++++++++-------------
 kernel/trace/trace.h        |    8 +++-
 kernel/trace/trace_boot.c   |    5 +-
 kernel/trace/trace_events.c |   68 +++++++++++++++++++------------
 4 files changed, 117 insertions(+), 57 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 5ddaeced9cb3..b27e1cdeffb0 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -5034,6 +5034,11 @@ static ssize_t
 tracing_write_stub(struct file *filp, const char __user *ubuf,
 		   size_t count, loff_t *ppos)
 {
+	struct trace_array *tr = file_inode(filp)->i_private;
+
+	if (trace_array_is_readonly(tr))
+		return -EPERM;
+
 	return count;
 }
 
@@ -5134,6 +5139,9 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
 	cpumask_var_t tracing_cpumask_new;
 	int err;
 
+	if (trace_array_is_readonly(tr))
+		return -EPERM;
+
 	if (count == 0 || count > KMALLOC_MAX_SIZE)
 		return -EINVAL;
 
@@ -6418,6 +6426,9 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
 	size_t ret;
 	int err;
 
+	if (trace_array_is_readonly(tr))
+		return -EPERM;
+
 	ret = cnt;
 
 	if (cnt > MAX_TRACER_SIZE)
@@ -7052,6 +7063,9 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
 	unsigned long val;
 	int ret;
 
+	if (trace_array_is_readonly(tr))
+		return -EPERM;
+
 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
 	if (ret)
 		return ret;
@@ -7806,6 +7820,9 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
 	const char *clockstr;
 	int ret;
 
+	if (trace_array_is_readonly(tr))
+		return -EPERM;
+
 	if (cnt >= sizeof(buf))
 		return -EINVAL;
 
@@ -9360,12 +9377,16 @@ static void
 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
 {
 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
+	umode_t writable_mode = TRACE_MODE_WRITE;
 	struct dentry *d_cpu;
 	char cpu_dir[30]; /* 30 characters should be more than enough */
 
 	if (!d_percpu)
 		return;
 
+	if (trace_array_is_readonly(tr))
+		writable_mode = TRACE_MODE_READ;
+
 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
 	if (!d_cpu) {
@@ -9588,7 +9609,6 @@ struct dentry *trace_create_file(const char *name,
 	return ret;
 }
 
-
 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
 {
 	struct dentry *d_tracer;
@@ -9818,6 +9838,9 @@ rb_simple_write(struct file *filp, const char __user *ubuf,
 	unsigned long val;
 	int ret;
 
+	if (trace_array_is_readonly(tr))
+		return -EPERM;
+
 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
 	if (ret)
 		return ret;
@@ -9924,6 +9947,9 @@ buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
 	int pages;
 	int ret;
 
+	if (trace_array_is_readonly(tr))
+		return -EPERM;
+
 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
 	if (ret)
 		return ret;
@@ -10604,17 +10630,23 @@ static __init void create_trace_instances(struct dentry *d_tracer)
 static void
 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
 {
+	umode_t writable_mode = TRACE_MODE_WRITE;
+	bool readonly = trace_array_is_readonly(tr);
 	int cpu;
 
+	if (readonly)
+		writable_mode = TRACE_MODE_READ;
+
 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
-			tr, &show_traces_fops);
+			  tr, &show_traces_fops);
 
-	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
-			tr, &set_tracer_fops);
+	trace_create_file("current_tracer", writable_mode, d_tracer,
+			  tr, &set_tracer_fops);
 
-	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
+	trace_create_file("tracing_cpumask", writable_mode, d_tracer,
 			  tr, &tracing_cpumask_fops);
 
+	/* Options are used for changing print-format even for readonly instance. */
 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
 			  tr, &tracing_iter_fops);
 
@@ -10624,27 +10656,35 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
 			  tr, &tracing_pipe_fops);
 
-	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
+	trace_create_file("buffer_size_kb", writable_mode, d_tracer,
 			  tr, &tracing_entries_fops);
 
 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
 			  tr, &tracing_total_entries_fops);
 
-	trace_create_file("free_buffer", 0200, d_tracer,
-			  tr, &tracing_free_buffer_fops);
+	if (!readonly) {
+		trace_create_file("free_buffer", 0200, d_tracer,
+				tr, &tracing_free_buffer_fops);
 
-	trace_create_file("trace_marker", 0220, d_tracer,
-			  tr, &tracing_mark_fops);
+		trace_create_file("trace_marker", 0220, d_tracer,
+				tr, &tracing_mark_fops);
 
-	tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
+		tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
 
-	trace_create_file("trace_marker_raw", 0220, d_tracer,
-			  tr, &tracing_mark_raw_fops);
+		trace_create_file("trace_marker_raw", 0220, d_tracer,
+				tr, &tracing_mark_raw_fops);
 
-	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
+		trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
+				tr, &buffer_percent_fops);
+
+		trace_create_file("syscall_user_buf_size", TRACE_MODE_WRITE, d_tracer,
+				tr, &tracing_syscall_buf_fops);
+	}
+
+	trace_create_file("trace_clock", writable_mode, d_tracer, tr,
 			  &trace_clock_fops);
 
-	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
+	trace_create_file("tracing_on", writable_mode, d_tracer,
 			  tr, &rb_simple_fops);
 
 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
@@ -10652,41 +10692,38 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
 
 	tr->buffer_percent = 50;
 
-	trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
-			tr, &buffer_percent_fops);
-
-	trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
+	trace_create_file("buffer_subbuf_size_kb", writable_mode, d_tracer,
 			  tr, &buffer_subbuf_size_fops);
 
-	trace_create_file("syscall_user_buf_size", TRACE_MODE_WRITE, d_tracer,
-			 tr, &tracing_syscall_buf_fops);
-
 	create_trace_options_dir(tr);
 
 #ifdef CONFIG_TRACER_MAX_TRACE
-	trace_create_maxlat_file(tr, d_tracer);
+	if (!readonly)
+		trace_create_maxlat_file(tr, d_tracer);
 #endif
 
-	if (ftrace_create_function_files(tr, d_tracer))
+	if (!readonly && ftrace_create_function_files(tr, d_tracer))
 		MEM_FAIL(1, "Could not allocate function filter files");
 
 	if (tr->range_addr_start) {
 		trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer,
 				  tr, &last_boot_fops);
 #ifdef CONFIG_TRACER_SNAPSHOT
-	} else {
+	} else if (!readonly) {
 		trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
 				  tr, &snapshot_fops);
 #endif
 	}
 
-	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
-			  tr, &tracing_err_log_fops);
+	if (!readonly)
+		trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
+				  tr, &tracing_err_log_fops);
 
 	for_each_tracing_cpu(cpu)
 		tracing_init_tracefs_percpu(tr, cpu);
 
-	ftrace_init_tracefs(tr, d_tracer);
+	if (!readonly)
+		ftrace_init_tracefs(tr, d_tracer);
 }
 
 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index b6d42fe06115..a098011951cc 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -33,6 +33,7 @@
 
 #define TRACE_MODE_WRITE	0640
 #define TRACE_MODE_READ		0440
+#define TRACE_MODE_WRITE_MASK	(TRACE_MODE_WRITE & ~TRACE_MODE_READ)
 
 enum trace_type {
 	__TRACE_FIRST_TYPE = 0,
@@ -483,6 +484,12 @@ extern bool trace_clock_in_ns(struct trace_array *tr);
 
 extern unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr);
 
+static inline bool trace_array_is_readonly(struct trace_array *tr)
+{
+	/* backup instance is read only. */
+	return tr->flags & TRACE_ARRAY_FL_VMALLOC;
+}
+
 /*
  * The global tracer (top) should be the first trace array added,
  * but we check the flag anyway.
@@ -681,7 +688,6 @@ struct dentry *trace_create_file(const char *name,
 				 void *data,
 				 const struct file_operations *fops);
 
-
 /**
  * tracer_tracing_is_on_cpu - show real state of ring buffer enabled on for a cpu
  * @tr : the trace array to know if ring buffer is enabled
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index dbe29b4c6a7a..2ca2541c8a58 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -61,7 +61,8 @@ trace_boot_set_instance_options(struct trace_array *tr, struct xbc_node *node)
 		v = memparse(p, NULL);
 		if (v < PAGE_SIZE)
 			pr_err("Buffer size is too small: %s\n", p);
-		if (tracing_resize_ring_buffer(tr, v, RING_BUFFER_ALL_CPUS) < 0)
+		if (trace_array_is_readonly(tr) ||
+		    tracing_resize_ring_buffer(tr, v, RING_BUFFER_ALL_CPUS) < 0)
 			pr_err("Failed to resize trace buffer to %s\n", p);
 	}
 
@@ -597,7 +598,7 @@ trace_boot_enable_tracer(struct trace_array *tr, struct xbc_node *node)
 
 	p = xbc_node_find_value(node, "tracer", NULL);
 	if (p && *p != '\0') {
-		if (tracing_set_tracer(tr, p) < 0)
+		if (trace_array_is_readonly(tr) || tracing_set_tracer(tr, p) < 0)
 			pr_err("Failed to set given tracer: %s\n", p);
 	}
 
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 9b07ad9eb284..5a9e03470b03 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1379,6 +1379,9 @@ static int __ftrace_set_clr_event(struct trace_array *tr, const char *match,
 {
 	int ret;
 
+	if (trace_array_is_readonly(tr))
+		return -EPERM;
+
 	mutex_lock(&event_mutex);
 	ret = __ftrace_set_clr_event_nolock(tr, match, sub, event, set, mod);
 	mutex_unlock(&event_mutex);
@@ -2817,8 +2820,8 @@ event_subsystem_dir(struct trace_array *tr, const char *name,
 	} else
 		__get_system(system);
 
-	/* ftrace only has directories no files */
-	if (strcmp(name, "ftrace") == 0)
+	/* ftrace only has directories no files, readonly instance too. */
+	if (strcmp(name, "ftrace") == 0 || trace_array_is_readonly(tr))
 		nr_entries = 0;
 	else
 		nr_entries = ARRAY_SIZE(system_entries);
@@ -2983,28 +2986,30 @@ event_create_dir(struct eventfs_inode *parent, struct trace_event_file *file)
 	int ret;
 	static struct eventfs_entry event_entries[] = {
 		{
-			.name		= "enable",
+			.name		= "format",
 			.callback	= event_callback,
-			.release	= event_release,
 		},
+#ifdef CONFIG_PERF_EVENTS
 		{
-			.name		= "filter",
+			.name		= "id",
 			.callback	= event_callback,
 		},
+#endif
+#define NR_RO_EVENT_ENTRIES	(1 + IS_ENABLED(CONFIG_PERF_EVENTS))
+/* Readonly files must be above this line and counted by NR_RO_EVENT_ENTRIES. */
 		{
-			.name		= "trigger",
+			.name		= "enable",
 			.callback	= event_callback,
+			.release	= event_release,
 		},
 		{
-			.name		= "format",
+			.name		= "filter",
 			.callback	= event_callback,
 		},
-#ifdef CONFIG_PERF_EVENTS
 		{
-			.name		= "id",
+			.name		= "trigger",
 			.callback	= event_callback,
 		},
-#endif
 #ifdef CONFIG_HIST_TRIGGERS
 		{
 			.name		= "hist",
@@ -3037,9 +3042,13 @@ event_create_dir(struct eventfs_inode *parent, struct trace_event_file *file)
 	if (!e_events)
 		return -ENOMEM;
 
-	nr_entries = ARRAY_SIZE(event_entries);
+	if (trace_array_is_readonly(tr))
+		nr_entries = NR_RO_EVENT_ENTRIES;
+	else
+		nr_entries = ARRAY_SIZE(event_entries);
 
 	name = trace_event_name(call);
+
 	ei = eventfs_create_dir(name, e_events, event_entries, nr_entries, file);
 	if (IS_ERR(ei)) {
 		pr_warn("Could not create tracefs '%s' directory\n", name);
@@ -4381,25 +4390,25 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
 	int nr_entries;
 	static struct eventfs_entry events_entries[] = {
 		{
-			.name		= "enable",
+			.name		= "header_page",
 			.callback	= events_callback,
 		},
 		{
-			.name		= "header_page",
+			.name		= "header_event",
 			.callback	= events_callback,
 		},
+#define NR_RO_TOP_ENTRIES	2
+/* Readonly files must be above this line and counted by NR_RO_TOP_ENTRIES. */
 		{
-			.name		= "header_event",
+			.name		= "enable",
 			.callback	= events_callback,
 		},
 	};
 
-	entry = trace_create_file("set_event", TRACE_MODE_WRITE, parent,
-				  tr, &ftrace_set_event_fops);
-	if (!entry)
-		return -ENOMEM;
-
-	nr_entries = ARRAY_SIZE(events_entries);
+	if (trace_array_is_readonly(tr))
+		nr_entries = NR_RO_TOP_ENTRIES;
+	else
+		nr_entries = ARRAY_SIZE(events_entries);
 
 	e_events = eventfs_create_events_dir("events", parent, events_entries,
 					     nr_entries, tr);
@@ -4408,15 +4417,22 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
 		return -ENOMEM;
 	}
 
-	/* There are not as crucial, just warn if they are not created */
+	if (!trace_array_is_readonly(tr)) {
 
-	trace_create_file("set_event_pid", TRACE_MODE_WRITE, parent,
-			  tr, &ftrace_set_event_pid_fops);
+		entry = trace_create_file("set_event", TRACE_MODE_WRITE, parent,
+					tr, &ftrace_set_event_fops);
+		if (!entry)
+			return -ENOMEM;
+
+		/* There are not as crucial, just warn if they are not created */
 
-	trace_create_file("set_event_notrace_pid",
-			  TRACE_MODE_WRITE, parent, tr,
-			  &ftrace_set_event_notrace_pid_fops);
+		trace_create_file("set_event_pid", TRACE_MODE_WRITE, parent,
+				tr, &ftrace_set_event_pid_fops);
 
+		trace_create_file("set_event_notrace_pid",
+				TRACE_MODE_WRITE, parent, tr,
+				&ftrace_set_event_notrace_pid_fops);
+	}
 	tr->event_dir = e_events;
 
 	return 0;
Re: [PATCH v4 2/4] tracing: Make the backup instance non-reusable
Posted by kernel test robot 3 weeks ago
Hi Masami,

kernel test robot noticed the following build warnings:

[auto build test WARNING on trace/for-next]
[also build test WARNING on linus/master v6.19-rc6 next-20260119]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Masami-Hiramatsu-Google/tracing-Reset-last_boot_info-if-ring-buffer-is-reset/20260120-091429
base:   https://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace for-next
patch link:    https://lore.kernel.org/r/176887137556.578403.17994205756247311821.stgit%40mhiramat.tok.corp.google.com
patch subject: [PATCH v4 2/4] tracing: Make the backup instance non-reusable
config: arc-defconfig (https://download.01.org/0day-ci/archive/20260120/202601201531.ng4kqZhn-lkp@intel.com/config)
compiler: arc-linux-gcc (GCC) 15.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260120/202601201531.ng4kqZhn-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202601201531.ng4kqZhn-lkp@intel.com/

All warnings (new ones prefixed by >>):

   kernel/trace/trace.c: In function 'tracing_init_tracefs_percpu':
>> kernel/trace/trace.c:9398:17: warning: variable 'writable_mode' set but not used [-Wunused-but-set-variable]
    9398 |         umode_t writable_mode = TRACE_MODE_WRITE;
         |                 ^~~~~~~~~~~~~


vim +/writable_mode +9398 kernel/trace/trace.c

  9393	
  9394	static void
  9395	tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
  9396	{
  9397		struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
> 9398		umode_t writable_mode = TRACE_MODE_WRITE;
  9399		struct dentry *d_cpu;
  9400		char cpu_dir[30]; /* 30 characters should be more than enough */
  9401	
  9402		if (!d_percpu)
  9403			return;
  9404	
  9405		if (trace_array_is_readonly(tr))
  9406			writable_mode = TRACE_MODE_READ;
  9407	
  9408		snprintf(cpu_dir, 30, "cpu%ld", cpu);
  9409		d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
  9410		if (!d_cpu) {
  9411			pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
  9412			return;
  9413		}
  9414	
  9415		/* per cpu trace_pipe */
  9416		trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
  9417					tr, cpu, &tracing_pipe_fops);
  9418	
  9419		/* per cpu trace */
  9420		trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
  9421					tr, cpu, &tracing_fops);
  9422	
  9423		trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
  9424					tr, cpu, &tracing_buffers_fops);
  9425	
  9426		trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
  9427					tr, cpu, &tracing_stats_fops);
  9428	
  9429		trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
  9430					tr, cpu, &tracing_entries_fops);
  9431	
  9432		if (tr->range_addr_start)
  9433			trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu,
  9434					      tr, cpu, &tracing_buffer_meta_fops);
  9435	#ifdef CONFIG_TRACER_SNAPSHOT
  9436		if (!tr->range_addr_start) {
  9437			trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
  9438					      tr, cpu, &snapshot_fops);
  9439	
  9440			trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
  9441					      tr, cpu, &snapshot_raw_fops);
  9442		}
  9443	#endif
  9444	}
  9445	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki