[PATCH 6/7] tracing: Allow syscall trace events to read more than one user parameter

Steven Rostedt posted 7 patches 2 months ago
There is a newer version of this series
[PATCH 6/7] tracing: Allow syscall trace events to read more than one user parameter
Posted by Steven Rostedt 2 months ago
From: Steven Rostedt <rostedt@goodmis.org>

Allow more than one field of a syscall trace event to read user space.
Build on top of the user_mask by allowing more than one bit to be set that
corresponds to the @args array of the syscall metadata. For each argument
in the @args array that is to be read, it will have a dynamic array/string
field associated to it.

Note that multiple fields to be read from user space is not supported if
the user_arg_size field is set in the syscall metada. That field can only
be used if only one field is being read from user space as that field is a
number representing the size field of the syscall event that holds the
size of the data to read from user space. It becomes ambiguous if the
system call reads more than one field. Currently this is not an issue.

If a syscall event happens to enable two events to read user space and
sets the user_arg_size field, it will trigger a warning at boot and the
user_arg_size field will be cleared.

The per CPU buffer that is used to read the user space addresses is now
broken up into 3 sections, each of 168 bytes. The reason for 168 is that
it is the biggest portion of 512 bytes divided by 3 that is 8 byte aligned.

The max amount copied into the ring buffer from user space is now only 128
bytes, which is plenty. When reading user space, it still reads 167
(168-1) bytes and uses the remaining to know if it should append the extra
"..." to the end or not.

This will allow the event to look like this:

  sys_renameat2(olddfd: 0xffffff9c, oldname: 0x7ffe02facdff "/tmp/x", newdfd: 0xffffff9c, newname: 0x7ffe02face06 "/tmp/y", flags: 1)

Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace_syscalls.c | 312 ++++++++++++++++++++++------------
 1 file changed, 207 insertions(+), 105 deletions(-)

diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 8c0142eea898..b39fa9dd1067 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -139,6 +139,7 @@ print_syscall_enter(struct trace_iterator *iter, int flags,
 	struct syscall_metadata *entry;
 	int i, syscall, val, len;
 	unsigned char *ptr;
+	int offset = 0;
 
 	trace = (typeof(trace))ent;
 	syscall = trace->nr;
@@ -178,11 +179,12 @@ print_syscall_enter(struct trace_iterator *iter, int flags,
 			continue;
 
 		/* This arg points to a user space string */
-		ptr = (void *)trace->args + sizeof(long) * entry->nb_args;
+		ptr = (void *)trace->args + sizeof(long) * entry->nb_args + offset;
 		val = *(int *)ptr;
 
 		ptr = (void *)ent + (val & 0xffff);
 		len = val >> 16;
+		offset += 4;
 
 		if (entry->user_arg_size < 0 || entry->user_arg_is_str) {
 			trace_seq_printf(s, " \"%.*s\"", len, ptr);
@@ -335,7 +337,6 @@ static int __init syscall_enter_define_fields(struct trace_event_call *call)
 	unsigned long mask;
 	char *arg;
 	int offset = offsetof(typeof(trace), args);
-	int idx;
 	int ret = 0;
 	int len;
 	int i;
@@ -354,27 +355,35 @@ static int __init syscall_enter_define_fields(struct trace_event_call *call)
 		return ret;
 
 	mask = meta->user_mask;
-	idx = ffs(mask) - 1;
 
-	/*
-	 * User space data is faulted into a temporary buffer and then
-	 * added as a dynamic string or array to the end of the event.
-	 * The user space data name for the arg pointer is "__<arg>_val".
-	 */
-	len = strlen(meta->args[idx]) + sizeof("___val");
-	arg = kmalloc(len, GFP_KERNEL);
-	if (WARN_ON_ONCE(!arg)) {
-		meta->user_mask = 0;
-		return -ENOMEM;
-	}
+	while (mask) {
+		int idx = ffs(mask) - 1;
+		mask &= ~BIT(idx);
+
+		/*
+		 * User space data is faulted into a temporary buffer and then
+		 * added as a dynamic string or array to the end of the event.
+		 * The user space data name for the arg pointer is
+		 * "__<arg>_val".
+		 */
+		len = strlen(meta->args[idx]) + sizeof("___val");
+		arg = kmalloc(len, GFP_KERNEL);
+		if (WARN_ON_ONCE(!arg)) {
+			meta->user_mask = 0;
+			return -ENOMEM;
+		}
 
-	snprintf(arg, len, "__%s_val", meta->args[idx]);
+		snprintf(arg, len, "__%s_val", meta->args[idx]);
 
-	ret = trace_define_field(call, "__data_loc char[]",
-				 arg, offset, sizeof(int), 0,
-				 FILTER_OTHER);
-	if (ret)
-		kfree(arg);
+		ret = trace_define_field(call, "__data_loc char[]",
+					 arg, offset, sizeof(int), 0,
+					 FILTER_OTHER);
+		if (ret) {
+			kfree(arg);
+			break;
+		}
+		offset += 4;
+	}
 	return ret;
 }
 
@@ -387,8 +396,25 @@ struct syscall_buf_info {
 	struct syscall_buf __percpu	*sbuf;
 };
 
-/* Create a per CPU temporary buffer to copy user space pointers into */
+/*
+ * Create a per CPU temporary buffer to copy user space pointers into.
+ *
+ * SYSCALL_FAULT_BUF_SZ holds the size of the per CPU buffer to use
+ * to copy memory from user space addresses into.
+ *
+ * SYSCALL_FAULT_ARG_SZ is the amount to copy from user space.
+ *
+ * SYSCALL_FAULT_USER_MAX is the amount to copy into the ring buffer.
+ *  It's slightly smaller than SYSCALL_FAULT_ARG_SZ to know if it
+ *  needs to append the EXTRA or not.
+ *
+ * This only allows up to 3 args from system calls.
+ */
 #define SYSCALL_FAULT_BUF_SZ 512
+#define SYSCALL_FAULT_ARG_SZ 168
+#define SYSCALL_FAULT_USER_MAX 128
+#define SYSCALL_FAULT_MAX_CNT 3
+
 static struct syscall_buf_info *syscall_buffer;
 static DEFINE_PER_CPU(unsigned long, sched_switch_cnt);
 
@@ -498,22 +524,57 @@ static void syscall_fault_buffer_disable(void)
 	call_rcu_tasks_trace(&sinfo->rcu, rcu_free_syscall_buffer);
 }
 
-static char *sys_fault_user(struct syscall_metadata *sys_data, struct syscall_buf_info *sinfo,
-			    unsigned long *args, unsigned int *data_size)
+static char *sys_fault_user(struct syscall_metadata *sys_data,
+			    struct syscall_buf_info *sinfo,
+			    unsigned long *args,
+			    unsigned int data_size[SYSCALL_FAULT_MAX_CNT])
 {
-	char *buf = per_cpu_ptr(sinfo->sbuf, smp_processor_id())->buf;
-	unsigned long size = SYSCALL_FAULT_BUF_SZ - 1;
+	char *buffer = per_cpu_ptr(sinfo->sbuf, smp_processor_id())->buf;
 	unsigned long mask = sys_data->user_mask;
+	unsigned long size = SYSCALL_FAULT_ARG_SZ - 1;
 	unsigned int cnt;
-	int idx = ffs(mask) - 1;
 	bool array = false;
-	char *ptr;
+	char *ptr_array[SYSCALL_FAULT_MAX_CNT];
+	char *buf;
+	int read[SYSCALL_FAULT_MAX_CNT];
 	int trys = 0;
+	int uargs;
 	int ret;
+	int i = 0;
+
+	/* The extra is appended to the user data in the buffer */
+	BUILD_BUG_ON(SYSCALL_FAULT_USER_MAX + sizeof(EXTRA) >=
+		     SYSCALL_FAULT_ARG_SZ);
+
+	/*
+	 * If this system call event has a size argument, use
+	 * it to define how much of user space memory to read,
+	 * and read it as an array and not a string.
+	 */
+	if (sys_data->user_arg_size >= 0) {
+		array = true;
+		size = args[sys_data->user_arg_size];
+		if (size > SYSCALL_FAULT_ARG_SZ - 1)
+			size = SYSCALL_FAULT_ARG_SZ - 1;
+	}
+
+	while (mask) {
+		int idx = ffs(mask) - 1;
+		mask &= ~BIT(idx);
+
+		if (WARN_ON_ONCE(i == SYSCALL_FAULT_MAX_CNT))
+			break;
+
+		/* Get the pointer to user space memory to read */
+		ptr_array[i++] = (char *)args[idx];
+	}
 
-	/* Get the pointer to user space memory to read */
-	ptr = (char *)args[idx];
-	*data_size = 0;
+	uargs = i;
+
+	/* Clear the values that are not used */
+	for (; i < SYSCALL_FAULT_MAX_CNT; i++) {
+		data_size[i] = -1; /* Denotes no pointer */
+	}
 
  again:
 	/*
@@ -532,24 +593,12 @@ static char *sys_fault_user(struct syscall_metadata *sys_data, struct syscall_bu
 		if (!once)
 			pr_warn("Error: Too many tries to read syscall %s\n", sys_data->name);
 		once = true;
-		return buf;
+		return buffer;
 	}
 
 	/* Read the current sched switch count */
 	cnt = this_cpu_read(sched_switch_cnt);
 
-	/*
-	 * If this system call event has a size argument, use
-	 * it to define how much of user space memory to read,
-	 * and read it as an array and not a string.
-	 */
-	if (sys_data->user_arg_size >= 0) {
-		array = true;
-		size = args[sys_data->user_arg_size];
-		if (size > SYSCALL_FAULT_BUF_SZ - 1)
-			size = SYSCALL_FAULT_BUF_SZ - 1;
-	}
-
 	/*
 	 * Preemption is going to be enabled, but this task must
 	 * remain on this CPU.
@@ -562,20 +611,23 @@ static char *sys_fault_user(struct syscall_metadata *sys_data, struct syscall_bu
 	 */
 	preempt_enable_notrace();
 
-	if (array) {
-		ret = __copy_from_user(buf, ptr, size);
-		ret = ret ? -1 : size;
-	} else {
-		ret = strncpy_from_user(buf, ptr, size);
+	buf = buffer;
+
+	for (i = 0; i < uargs; i++, buf += SYSCALL_FAULT_ARG_SZ) {
+		char *ptr = ptr_array[i];
+
+		if (array) {
+			ret = __copy_from_user(buf, ptr, size);
+			ret = ret ? -1 : size;
+		} else {
+			ret = strncpy_from_user(buf, ptr, size);
+		}
+		read[i] = ret;
 	}
 
 	preempt_disable_notrace();
 	migrate_enable();
 
-	/* If it faulted, no use to try again */
-	if (ret < 0)
-		return buf;
-
 	/*
 	 * Preemption is disabled again, now check the sched_switch_cnt.
 	 * If it increased by two or more, then another user space process
@@ -592,28 +644,39 @@ static char *sys_fault_user(struct syscall_metadata *sys_data, struct syscall_bu
 	if (this_cpu_read(sched_switch_cnt) > cnt + 1)
 		goto again;
 
-	/* For strings, replace any non-printable characters with '.' */
-	if (!array) {
-		for (int i = 0; i < ret; i++) {
-			if (!isprint(buf[i]))
-				buf[i] = '.';
-		}
+	buf = buffer;
+	for (i = 0; i < uargs; i++, buf += SYSCALL_FAULT_ARG_SZ) {
 
-		/*
-		 * If the text was truncated due to our max limit, add "..." to
-		 * the string.
-		 */
-		if (ret > SYSCALL_FAULT_BUF_SZ - sizeof(EXTRA)) {
-			strscpy(buf + SYSCALL_FAULT_BUF_SZ - sizeof(EXTRA),
-				EXTRA, sizeof(EXTRA));
-			ret = SYSCALL_FAULT_BUF_SZ;
+		ret = read[i];
+		if (ret < 0)
+			continue;
+		buf[ret] = '\0';
+
+		/* For strings, replace any non-printable characters with '.' */
+		if (!array) {
+			for (int x = 0; x < ret; x++) {
+				if (!isprint(buf[x]))
+					buf[x] = '.';
+			}
+
+			/*
+			 * If the text was truncated due to our max limit,
+			 * add "..." to the string.
+			 */
+			if (ret > SYSCALL_FAULT_USER_MAX) {
+				strscpy(buf + SYSCALL_FAULT_USER_MAX, EXTRA,
+					sizeof(EXTRA));
+				ret = SYSCALL_FAULT_USER_MAX + sizeof(EXTRA);
+			} else {
+				buf[ret++] = '\0';
+			}
 		} else {
-			buf[ret++] = '\0';
+			ret = min(ret, SYSCALL_FAULT_USER_MAX);
 		}
+		data_size[i] = ret;
 	}
 
-	*data_size = ret;
-	return buf;
+	return buffer;
 }
 
 static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
@@ -625,9 +688,10 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
 	struct trace_event_buffer fbuffer;
 	unsigned long args[6];
 	char *user_ptr;
-	int user_size = 0;
+	int user_sizes[SYSCALL_FAULT_MAX_CNT] = {};
 	int syscall_nr;
 	int size = 0;
+	int uargs = 0;
 	bool mayfault;
 
 	/*
@@ -660,20 +724,27 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
 
 	if (mayfault) {
 		struct syscall_buf_info *sinfo;
+		int i;
 
 		/* If the syscall_buffer is NULL, tracing is being shutdown */
 		sinfo = READ_ONCE(syscall_buffer);
 		if (!sinfo)
 			return;
 
-		user_ptr = sys_fault_user(sys_data, sinfo, args, &user_size);
+		user_ptr = sys_fault_user(sys_data, sinfo, args, user_sizes);
 		/*
 		 * user_size is the amount of data to append.
 		 * Need to add 4 for the meta field that points to
 		 * the user memory at the end of the event and also
 		 * stores its size.
 		 */
-		size = 4 + user_size;
+		for (i = 0; i < SYSCALL_FAULT_MAX_CNT; i++) {
+			if (user_sizes[i] < 0)
+				break;
+			size += user_sizes[i] + 4;
+		}
+		/* Save the number of user read arguments of this syscall */
+		uargs = i;
 	}
 
 	size += sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
@@ -688,6 +759,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
 	memcpy(entry->args, args, sizeof(unsigned long) * sys_data->nb_args);
 
 	if (mayfault) {
+		char *buf = user_ptr;
 		void *ptr;
 		int val;
 
@@ -699,21 +771,30 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
 
 		/*
 		 * The meta data will store the offset of the user data from
-		 * the beginning of the event.
+		 * the beginning of the event. That is after the static arguments
+		 * and the meta data fields.
 		 */
-		val  = (ptr - (void *)entry) + 4;
+		val = (ptr - (void *)entry) + 4 * uargs;
+
+		for (int i = 0; i < uargs; i++) {
 
-		/* Store the offset and the size into the meta data */
-		*(int *)ptr = val | (user_size << 16);
+			if (i)
+				val += user_sizes[i - 1];
 
-		if (WARN_ON_ONCE((ptr - (void *)entry + user_size) > size))
-			user_size = 0;
+			/* Store the offset and the size into the meta data */
+			*(int *)ptr = val | (user_sizes[i] << 16);
 
-		/* Nothing to do if the user space was empty or faulted */
-		if (user_size) {
-			/* Now store the user space data into the event */
+			/* Skip the meta data */
 			ptr += 4;
-			memcpy(ptr, user_ptr, user_size);
+		}
+
+		for (int i = 0; i < uargs; i++, buf += SYSCALL_FAULT_ARG_SZ) {
+			/* Nothing to do if the user space was empty or faulted */
+			if (!user_sizes[i])
+				continue;
+
+			memcpy(ptr, buf, user_sizes[i]);
+			ptr += user_sizes[i];
 		}
 	}
 
@@ -857,6 +938,7 @@ static void unreg_event_syscall_exit(struct trace_event_file *file,
 static void check_faultable_syscall(struct trace_event_call *call, int nr)
 {
 	struct syscall_metadata *sys_data = call->data;
+	unsigned long mask;
 
 	/* Only work on entry */
 	if (sys_data->enter_event != call)
@@ -888,7 +970,6 @@ static void check_faultable_syscall(struct trace_event_call *call, int nr)
 	/* user arg at position 0 */
 	case __NR_access:
 	case __NR_acct:
-	case __NR_add_key: /* Just _type. TODO add _description */
 	case __NR_chdir:
 	case __NR_chown:
 	case __NR_chmod:
@@ -897,28 +978,15 @@ static void check_faultable_syscall(struct trace_event_call *call, int nr)
 	case __NR_delete_module:
 	case __NR_execve:
 	case __NR_fsopen:
-	case __NR_getxattr: /* Just pathname, TODO add name */
 	case __NR_lchown:
-	case __NR_lgetxattr: /* Just pathname, TODO add name */
-	case __NR_lremovexattr: /* Just pathname, TODO add name */
-	case __NR_link: /* Just oldname. TODO add newname */
-	case __NR_listxattr: /* Just pathname, TODO add list */
-	case __NR_llistxattr: /* Just pathname, TODO add list */
-	case __NR_lsetxattr: /* Just pathname, TODO add list */
 	case __NR_open:
 	case __NR_memfd_create:
-	case __NR_mount: /* Just dev_name, TODO add dir_name and type */
 	case __NR_mkdir:
 	case __NR_mknod:
 	case __NR_mq_open:
 	case __NR_mq_unlink:
-	case __NR_pivot_root: /* Just new_root, TODO add old_root */
 	case __NR_readlink:
-	case __NR_removexattr: /* Just pathname, TODO add name */
-	case __NR_rename: /* Just oldname. TODO add newname */
-	case __NR_request_key: /* Just _type. TODO add _description */
 	case __NR_rmdir:
-	case __NR_setxattr: /* Just pathname, TODO add list */
 	case __NR_shmdt:
 	case __NR_statfs:
 	case __NR_swapon:
@@ -945,14 +1013,10 @@ static void check_faultable_syscall(struct trace_event_call *call, int nr)
 	case __NR_fspick:
 	case __NR_fremovexattr:
 	case __NR_futimesat:
-	case __NR_getxattrat: /* Just pathname, TODO add name */
 	case __NR_inotify_add_watch:
-	case __NR_linkat: /* Just oldname. TODO add newname */
-	case __NR_listxattrat: /* Just pathname, TODO add list */
 	case __NR_mkdirat:
 	case __NR_mknodat:
 	case __NR_mount_setattr:
-	case __NR_move_mount: /* Just from_pathname, TODO add to_pathname */
 	case __NR_name_to_handle_at:
 	case __NR_newfstatat:
 	case __NR_openat:
@@ -960,13 +1024,8 @@ static void check_faultable_syscall(struct trace_event_call *call, int nr)
 	case __NR_open_tree:
 	case __NR_open_tree_attr:
 	case __NR_readlinkat:
-	case __NR_renameat: /* Just oldname. TODO add newname */
-	case __NR_renameat2: /* Just oldname. TODO add newname */
-	case __NR_removexattrat: /* Just pathname, TODO add name */
 	case __NR_quotactl:
-	case __NR_setxattrat: /* Just pathname, TODO add list */
 	case __NR_syslog:
-	case __NR_symlinkat: /* Just oldname. TODO add newname */
 	case __NR_statx:
 	case __NR_unlinkat:
 	case __NR_utimensat:
@@ -981,9 +1040,52 @@ static void check_faultable_syscall(struct trace_event_call *call, int nr)
 	case __NR_fanotify_mark:
 		sys_data->user_mask = BIT(4);
 		break;
+	/* 2 user args, 0 and 1 */
+	case __NR_add_key:
+	case __NR_getxattr:
+	case __NR_lgetxattr:
+	case __NR_lremovexattr:
+	case __NR_link:
+	case __NR_listxattr:
+	case __NR_llistxattr:
+	case __NR_lsetxattr:
+	case __NR_pivot_root:
+	case __NR_removexattr:
+	case __NR_rename:
+	case __NR_request_key:
+	case __NR_setxattr:
+	case __NR_symlinkat:
+		sys_data->user_mask = BIT(0) | BIT(1);
+		break;
+	/* 2 user args, 1 and 3 */
+	case __NR_getxattrat:
+	case __NR_linkat:
+	case __NR_listxattrat:
+	case __NR_move_mount:
+	case __NR_renameat:
+	case __NR_renameat2:
+	case __NR_removexattrat:
+	case __NR_setxattrat:
+		sys_data->user_mask = BIT(1) | BIT(3);
+		break;
+	case __NR_mount: /* Just dev_name and dir_name, TODO add type */
+		sys_data->user_mask = BIT(0) | BIT(1) | BIT(2);
+		break;
 	default:
 		sys_data->user_mask = 0;
+		return;
 	}
+
+	if (sys_data->user_arg_size < 0)
+		return;
+
+	/*
+	 * The user_arg_size can only be used when the system call
+	 * is reading only a single address from user space.
+	 */
+	mask = sys_data->user_mask;
+	if (WARN_ON(mask & (mask - 1)))
+		sys_data->user_arg_size = -1;
 }
 
 static int __init init_syscall_trace(struct trace_event_call *call)
-- 
2.47.2
Re: [PATCH 6/7] tracing: Allow syscall trace events to read more than one user parameter
Posted by kernel test robot 1 month, 4 weeks ago
Hi Steven,

kernel test robot noticed the following build warnings:

[auto build test WARNING on trace/for-next]
[also build test WARNING on linus/master v6.16 next-20250806]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Steven-Rostedt/tracing-Replace-syscall-RCU-pointer-assignment-with-READ-WRITE_ONCE/20250806-122312
base:   https://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace for-next
patch link:    https://lore.kernel.org/r/20250805193235.582013098%40kernel.org
patch subject: [PATCH 6/7] tracing: Allow syscall trace events to read more than one user parameter
config: x86_64-randconfig-123-20250806 (https://download.01.org/0day-ci/archive/20250807/202508070706.TiTQY0Ne-lkp@intel.com/config)
compiler: gcc-12 (Debian 12.2.0-14+deb12u1) 12.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250807/202508070706.TiTQY0Ne-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202508070706.TiTQY0Ne-lkp@intel.com/

sparse warnings: (new ones prefixed by >>)
>> kernel/trace/trace_syscalls.c:620:53: sparse: sparse: incorrect type in argument 2 (different address spaces) @@     expected void const [noderef] __user *from @@     got char *ptr @@
   kernel/trace/trace_syscalls.c:620:53: sparse:     expected void const [noderef] __user *from
   kernel/trace/trace_syscalls.c:620:53: sparse:     got char *ptr
>> kernel/trace/trace_syscalls.c:623:54: sparse: sparse: incorrect type in argument 2 (different address spaces) @@     expected char const [noderef] __user *src @@     got char *ptr @@
   kernel/trace/trace_syscalls.c:623:54: sparse:     expected char const [noderef] __user *src
   kernel/trace/trace_syscalls.c:623:54: sparse:     got char *ptr
   kernel/trace/trace_syscalls.c:707:20: sparse: sparse: incorrect type in assignment (different address spaces) @@     expected struct trace_event_file *trace_file @@     got struct trace_event_file [noderef] __rcu * @@
   kernel/trace/trace_syscalls.c:707:20: sparse:     expected struct trace_event_file *trace_file
   kernel/trace/trace_syscalls.c:707:20: sparse:     got struct trace_event_file [noderef] __rcu *
   kernel/trace/trace_syscalls.c:824:20: sparse: sparse: incorrect type in assignment (different address spaces) @@     expected struct trace_event_file *trace_file @@     got struct trace_event_file [noderef] __rcu * @@
   kernel/trace/trace_syscalls.c:824:20: sparse:     expected struct trace_event_file *trace_file
   kernel/trace/trace_syscalls.c:824:20: sparse:     got struct trace_event_file [noderef] __rcu *
   kernel/trace/trace_syscalls.c:871:9: sparse: sparse: incorrect type in assignment (different address spaces) @@     expected struct trace_event_file [noderef] __rcu *volatile @@     got struct trace_event_file *file @@
   kernel/trace/trace_syscalls.c:871:9: sparse:     expected struct trace_event_file [noderef] __rcu *volatile
   kernel/trace/trace_syscalls.c:871:9: sparse:     got struct trace_event_file *file
   kernel/trace/trace_syscalls.c:909:17: sparse: sparse: incorrect type in assignment (different address spaces) @@     expected struct trace_event_file [noderef] __rcu *volatile @@     got struct trace_event_file *file @@
   kernel/trace/trace_syscalls.c:909:17: sparse:     expected struct trace_event_file [noderef] __rcu *volatile
   kernel/trace/trace_syscalls.c:909:17: sparse:     got struct trace_event_file *file

vim +620 kernel/trace/trace_syscalls.c

6bc850d6f8f730 Steven Rostedt 2025-08-05  526  
623bd9e046f95c Steven Rostedt 2025-08-05  527  static char *sys_fault_user(struct syscall_metadata *sys_data,
623bd9e046f95c Steven Rostedt 2025-08-05  528  			    struct syscall_buf_info *sinfo,
623bd9e046f95c Steven Rostedt 2025-08-05  529  			    unsigned long *args,
623bd9e046f95c Steven Rostedt 2025-08-05  530  			    unsigned int data_size[SYSCALL_FAULT_MAX_CNT])
6bc850d6f8f730 Steven Rostedt 2025-08-05  531  {
623bd9e046f95c Steven Rostedt 2025-08-05  532  	char *buffer = per_cpu_ptr(sinfo->sbuf, smp_processor_id())->buf;
6bc850d6f8f730 Steven Rostedt 2025-08-05  533  	unsigned long mask = sys_data->user_mask;
623bd9e046f95c Steven Rostedt 2025-08-05  534  	unsigned long size = SYSCALL_FAULT_ARG_SZ - 1;
6bc850d6f8f730 Steven Rostedt 2025-08-05  535  	unsigned int cnt;
b979d33ec48bbd Steven Rostedt 2025-08-05  536  	bool array = false;
623bd9e046f95c Steven Rostedt 2025-08-05  537  	char *ptr_array[SYSCALL_FAULT_MAX_CNT];
623bd9e046f95c Steven Rostedt 2025-08-05  538  	char *buf;
623bd9e046f95c Steven Rostedt 2025-08-05  539  	int read[SYSCALL_FAULT_MAX_CNT];
6bc850d6f8f730 Steven Rostedt 2025-08-05  540  	int trys = 0;
623bd9e046f95c Steven Rostedt 2025-08-05  541  	int uargs;
6bc850d6f8f730 Steven Rostedt 2025-08-05  542  	int ret;
623bd9e046f95c Steven Rostedt 2025-08-05  543  	int i = 0;
623bd9e046f95c Steven Rostedt 2025-08-05  544  
623bd9e046f95c Steven Rostedt 2025-08-05  545  	/* The extra is appended to the user data in the buffer */
623bd9e046f95c Steven Rostedt 2025-08-05  546  	BUILD_BUG_ON(SYSCALL_FAULT_USER_MAX + sizeof(EXTRA) >=
623bd9e046f95c Steven Rostedt 2025-08-05  547  		     SYSCALL_FAULT_ARG_SZ);
623bd9e046f95c Steven Rostedt 2025-08-05  548  
623bd9e046f95c Steven Rostedt 2025-08-05  549  	/*
623bd9e046f95c Steven Rostedt 2025-08-05  550  	 * If this system call event has a size argument, use
623bd9e046f95c Steven Rostedt 2025-08-05  551  	 * it to define how much of user space memory to read,
623bd9e046f95c Steven Rostedt 2025-08-05  552  	 * and read it as an array and not a string.
623bd9e046f95c Steven Rostedt 2025-08-05  553  	 */
623bd9e046f95c Steven Rostedt 2025-08-05  554  	if (sys_data->user_arg_size >= 0) {
623bd9e046f95c Steven Rostedt 2025-08-05  555  		array = true;
623bd9e046f95c Steven Rostedt 2025-08-05  556  		size = args[sys_data->user_arg_size];
623bd9e046f95c Steven Rostedt 2025-08-05  557  		if (size > SYSCALL_FAULT_ARG_SZ - 1)
623bd9e046f95c Steven Rostedt 2025-08-05  558  			size = SYSCALL_FAULT_ARG_SZ - 1;
623bd9e046f95c Steven Rostedt 2025-08-05  559  	}
623bd9e046f95c Steven Rostedt 2025-08-05  560  
623bd9e046f95c Steven Rostedt 2025-08-05  561  	while (mask) {
623bd9e046f95c Steven Rostedt 2025-08-05  562  		int idx = ffs(mask) - 1;
623bd9e046f95c Steven Rostedt 2025-08-05  563  		mask &= ~BIT(idx);
623bd9e046f95c Steven Rostedt 2025-08-05  564  
623bd9e046f95c Steven Rostedt 2025-08-05  565  		if (WARN_ON_ONCE(i == SYSCALL_FAULT_MAX_CNT))
623bd9e046f95c Steven Rostedt 2025-08-05  566  			break;
6bc850d6f8f730 Steven Rostedt 2025-08-05  567  
6bc850d6f8f730 Steven Rostedt 2025-08-05  568  		/* Get the pointer to user space memory to read */
623bd9e046f95c Steven Rostedt 2025-08-05  569  		ptr_array[i++] = (char *)args[idx];
623bd9e046f95c Steven Rostedt 2025-08-05  570  	}
623bd9e046f95c Steven Rostedt 2025-08-05  571  
623bd9e046f95c Steven Rostedt 2025-08-05  572  	uargs = i;
623bd9e046f95c Steven Rostedt 2025-08-05  573  
623bd9e046f95c Steven Rostedt 2025-08-05  574  	/* Clear the values that are not used */
623bd9e046f95c Steven Rostedt 2025-08-05  575  	for (; i < SYSCALL_FAULT_MAX_CNT; i++) {
623bd9e046f95c Steven Rostedt 2025-08-05  576  		data_size[i] = -1; /* Denotes no pointer */
623bd9e046f95c Steven Rostedt 2025-08-05  577  	}
6bc850d6f8f730 Steven Rostedt 2025-08-05  578  
6bc850d6f8f730 Steven Rostedt 2025-08-05  579   again:
6bc850d6f8f730 Steven Rostedt 2025-08-05  580  	/*
6bc850d6f8f730 Steven Rostedt 2025-08-05  581  	 * If this task is preempted by another user space task, it
6bc850d6f8f730 Steven Rostedt 2025-08-05  582  	 * will cause this task to try again. But just in case something
6bc850d6f8f730 Steven Rostedt 2025-08-05  583  	 * changes where the copying from user space causes another task
6bc850d6f8f730 Steven Rostedt 2025-08-05  584  	 * to run, prevent this from going into an infinite loop.
6bc850d6f8f730 Steven Rostedt 2025-08-05  585  	 * 10 tries should be plenty.
6bc850d6f8f730 Steven Rostedt 2025-08-05  586  	 */
6bc850d6f8f730 Steven Rostedt 2025-08-05  587  	if (trys++ > 10) {
6bc850d6f8f730 Steven Rostedt 2025-08-05  588  		static bool once;
6bc850d6f8f730 Steven Rostedt 2025-08-05  589  		/*
6bc850d6f8f730 Steven Rostedt 2025-08-05  590  		 * Only print a message instead of a WARN_ON() as this could
6bc850d6f8f730 Steven Rostedt 2025-08-05  591  		 * theoretically trigger under real load.
6bc850d6f8f730 Steven Rostedt 2025-08-05  592  		 */
6bc850d6f8f730 Steven Rostedt 2025-08-05  593  		if (!once)
6bc850d6f8f730 Steven Rostedt 2025-08-05  594  			pr_warn("Error: Too many tries to read syscall %s\n", sys_data->name);
6bc850d6f8f730 Steven Rostedt 2025-08-05  595  		once = true;
623bd9e046f95c Steven Rostedt 2025-08-05  596  		return buffer;
6bc850d6f8f730 Steven Rostedt 2025-08-05  597  	}
6bc850d6f8f730 Steven Rostedt 2025-08-05  598  
6bc850d6f8f730 Steven Rostedt 2025-08-05  599  	/* Read the current sched switch count */
6bc850d6f8f730 Steven Rostedt 2025-08-05  600  	cnt = this_cpu_read(sched_switch_cnt);
6bc850d6f8f730 Steven Rostedt 2025-08-05  601  
6bc850d6f8f730 Steven Rostedt 2025-08-05  602  	/*
6bc850d6f8f730 Steven Rostedt 2025-08-05  603  	 * Preemption is going to be enabled, but this task must
6bc850d6f8f730 Steven Rostedt 2025-08-05  604  	 * remain on this CPU.
6bc850d6f8f730 Steven Rostedt 2025-08-05  605  	 */
6bc850d6f8f730 Steven Rostedt 2025-08-05  606  	migrate_disable();
6bc850d6f8f730 Steven Rostedt 2025-08-05  607  
6bc850d6f8f730 Steven Rostedt 2025-08-05  608  	/*
6bc850d6f8f730 Steven Rostedt 2025-08-05  609  	 * Now preemption is being enabed and another task can come in
6bc850d6f8f730 Steven Rostedt 2025-08-05  610  	 * and use the same buffer and corrupt our data.
6bc850d6f8f730 Steven Rostedt 2025-08-05  611  	 */
6bc850d6f8f730 Steven Rostedt 2025-08-05  612  	preempt_enable_notrace();
6bc850d6f8f730 Steven Rostedt 2025-08-05  613  
623bd9e046f95c Steven Rostedt 2025-08-05  614  	buf = buffer;
623bd9e046f95c Steven Rostedt 2025-08-05  615  
623bd9e046f95c Steven Rostedt 2025-08-05  616  	for (i = 0; i < uargs; i++, buf += SYSCALL_FAULT_ARG_SZ) {
623bd9e046f95c Steven Rostedt 2025-08-05  617  		char *ptr = ptr_array[i];
623bd9e046f95c Steven Rostedt 2025-08-05  618  
b979d33ec48bbd Steven Rostedt 2025-08-05  619  		if (array) {
b979d33ec48bbd Steven Rostedt 2025-08-05 @620  			ret = __copy_from_user(buf, ptr, size);
b979d33ec48bbd Steven Rostedt 2025-08-05  621  			ret = ret ? -1 : size;
b979d33ec48bbd Steven Rostedt 2025-08-05  622  		} else {
6bc850d6f8f730 Steven Rostedt 2025-08-05 @623  			ret = strncpy_from_user(buf, ptr, size);
b979d33ec48bbd Steven Rostedt 2025-08-05  624  		}
623bd9e046f95c Steven Rostedt 2025-08-05  625  		read[i] = ret;
623bd9e046f95c Steven Rostedt 2025-08-05  626  	}
6bc850d6f8f730 Steven Rostedt 2025-08-05  627  
6bc850d6f8f730 Steven Rostedt 2025-08-05  628  	preempt_disable_notrace();
6bc850d6f8f730 Steven Rostedt 2025-08-05  629  	migrate_enable();
6bc850d6f8f730 Steven Rostedt 2025-08-05  630  
6bc850d6f8f730 Steven Rostedt 2025-08-05  631  	/*
6bc850d6f8f730 Steven Rostedt 2025-08-05  632  	 * Preemption is disabled again, now check the sched_switch_cnt.
6bc850d6f8f730 Steven Rostedt 2025-08-05  633  	 * If it increased by two or more, then another user space process
6bc850d6f8f730 Steven Rostedt 2025-08-05  634  	 * may have schedule in and corrupted our buffer. In that case
6bc850d6f8f730 Steven Rostedt 2025-08-05  635  	 * the copying must be retried.
6bc850d6f8f730 Steven Rostedt 2025-08-05  636  	 *
6bc850d6f8f730 Steven Rostedt 2025-08-05  637  	 * Note, if this task was scheduled out and only kernel threads
6bc850d6f8f730 Steven Rostedt 2025-08-05  638  	 * were scheduled in (maybe to process the fault), then the
6bc850d6f8f730 Steven Rostedt 2025-08-05  639  	 * counter would increment again when this task scheduled in.
6bc850d6f8f730 Steven Rostedt 2025-08-05  640  	 * If this task scheduled out and another user task scheduled
6bc850d6f8f730 Steven Rostedt 2025-08-05  641  	 * in, this task would still need to be scheduled back in and
6bc850d6f8f730 Steven Rostedt 2025-08-05  642  	 * the counter would increment by at least two.
6bc850d6f8f730 Steven Rostedt 2025-08-05  643  	 */
6bc850d6f8f730 Steven Rostedt 2025-08-05  644  	if (this_cpu_read(sched_switch_cnt) > cnt + 1)
6bc850d6f8f730 Steven Rostedt 2025-08-05  645  		goto again;
6bc850d6f8f730 Steven Rostedt 2025-08-05  646  
623bd9e046f95c Steven Rostedt 2025-08-05  647  	buf = buffer;
623bd9e046f95c Steven Rostedt 2025-08-05  648  	for (i = 0; i < uargs; i++, buf += SYSCALL_FAULT_ARG_SZ) {
623bd9e046f95c Steven Rostedt 2025-08-05  649  
623bd9e046f95c Steven Rostedt 2025-08-05  650  		ret = read[i];
623bd9e046f95c Steven Rostedt 2025-08-05  651  		if (ret < 0)
623bd9e046f95c Steven Rostedt 2025-08-05  652  			continue;
623bd9e046f95c Steven Rostedt 2025-08-05  653  		buf[ret] = '\0';
623bd9e046f95c Steven Rostedt 2025-08-05  654  
b979d33ec48bbd Steven Rostedt 2025-08-05  655  		/* For strings, replace any non-printable characters with '.' */
b979d33ec48bbd Steven Rostedt 2025-08-05  656  		if (!array) {
623bd9e046f95c Steven Rostedt 2025-08-05  657  			for (int x = 0; x < ret; x++) {
623bd9e046f95c Steven Rostedt 2025-08-05  658  				if (!isprint(buf[x]))
623bd9e046f95c Steven Rostedt 2025-08-05  659  					buf[x] = '.';
6bc850d6f8f730 Steven Rostedt 2025-08-05  660  			}
6bc850d6f8f730 Steven Rostedt 2025-08-05  661  
6bc850d6f8f730 Steven Rostedt 2025-08-05  662  			/*
623bd9e046f95c Steven Rostedt 2025-08-05  663  			 * If the text was truncated due to our max limit,
623bd9e046f95c Steven Rostedt 2025-08-05  664  			 * add "..." to the string.
6bc850d6f8f730 Steven Rostedt 2025-08-05  665  			 */
623bd9e046f95c Steven Rostedt 2025-08-05  666  			if (ret > SYSCALL_FAULT_USER_MAX) {
623bd9e046f95c Steven Rostedt 2025-08-05  667  				strscpy(buf + SYSCALL_FAULT_USER_MAX, EXTRA,
623bd9e046f95c Steven Rostedt 2025-08-05  668  					sizeof(EXTRA));
623bd9e046f95c Steven Rostedt 2025-08-05  669  				ret = SYSCALL_FAULT_USER_MAX + sizeof(EXTRA);
6bc850d6f8f730 Steven Rostedt 2025-08-05  670  			} else {
6bc850d6f8f730 Steven Rostedt 2025-08-05  671  				buf[ret++] = '\0';
6bc850d6f8f730 Steven Rostedt 2025-08-05  672  			}
623bd9e046f95c Steven Rostedt 2025-08-05  673  		} else {
623bd9e046f95c Steven Rostedt 2025-08-05  674  			ret = min(ret, SYSCALL_FAULT_USER_MAX);
623bd9e046f95c Steven Rostedt 2025-08-05  675  		}
623bd9e046f95c Steven Rostedt 2025-08-05  676  		data_size[i] = ret;
b979d33ec48bbd Steven Rostedt 2025-08-05  677  	}
6bc850d6f8f730 Steven Rostedt 2025-08-05  678  
623bd9e046f95c Steven Rostedt 2025-08-05  679  	return buffer;
6bc850d6f8f730 Steven Rostedt 2025-08-05  680  }
6bc850d6f8f730 Steven Rostedt 2025-08-05  681  

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki