tools/perf/builtin-trace.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+)
Recent changes in the linux-next kernel will add new field for syscalls
to have contents in the userspace like below.
# cat /sys/kernel/tracing/events/syscalls/sys_enter_write/format
name: sys_enter_write
ID: 758
format:
field:unsigned short common_type; offset:0; size:2; signed:0;
field:unsigned char common_flags; offset:2; size:1; signed:0;
field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
field:int common_pid; offset:4; size:4; signed:1;
field:int __syscall_nr; offset:8; size:4; signed:1;
field:unsigned int fd; offset:16; size:8; signed:0;
field:const char * buf; offset:24; size:8; signed:0;
field:size_t count; offset:32; size:8; signed:0;
field:__data_loc char[] __buf_val; offset:40; size:4; signed:0;
print fmt: "fd: 0x%08lx, buf: 0x%08lx (%s), count: 0x%08lx", ((unsigned long)(REC->fd)),
((unsigned long)(REC->buf)), __print_dynamic_array(__buf_val, 1),
((unsigned long)(REC->count))
We have a different way to handle those arguments and this change
confuses perf trace then make some tests failing. Fix it by skipping
the new fields that have "__data_loc char[]" type.
Maybe we can switch to this instead of the BPF augmentation later.
Reported-by: Thomas Richter <tmricht@linux.ibm.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Howard Chu <howardchu95@gmail.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
tools/perf/builtin-trace.c | 21 +++++++++++++++++++++
1 file changed, 21 insertions(+)
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index a743bda294bd3400..baee1f6956001d86 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2069,6 +2069,15 @@ static const struct syscall_arg_fmt *syscall_arg_fmt__find_by_name(const char *n
return __syscall_arg_fmt__find_by_name(syscall_arg_fmts__by_name, nmemb, name);
}
+/*
+ * v6.19 kernel added new fields to read userspace memory for event tracing.
+ * But it's not used by perf and confuses the syscall parameters.
+ */
+static bool is_internal_field(struct tep_format_field *field)
+{
+ return !strcmp(field->type, "__data_loc char[]");
+}
+
static struct tep_format_field *
syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field *field,
bool *use_btf)
@@ -2077,6 +2086,10 @@ syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field
int len;
for (; field; field = field->next, ++arg) {
+ /* assume it's the last argument */
+ if (is_internal_field(field))
+ continue;
+
last_field = field;
if (arg->scnprintf)
@@ -2145,6 +2158,7 @@ static int syscall__read_info(struct syscall *sc, struct trace *trace)
{
char tp_name[128];
const char *name;
+ struct tep_format_field *field;
int err;
if (sc->nonexistent)
@@ -2201,6 +2215,13 @@ static int syscall__read_info(struct syscall *sc, struct trace *trace)
--sc->nr_args;
}
+ field = sc->args;
+ while (field) {
+ if (is_internal_field(field))
+ --sc->nr_args;
+ field = field->next;
+ }
+
sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
sc->is_open = !strcmp(name, "open") || !strcmp(name, "openat");
--
2.52.0.487.g5c8c507ade-goog
On Wed, 26 Nov 2025 20:44:18 -0800
Namhyung Kim <namhyung@kernel.org> wrote:
> Recent changes in the linux-next kernel will add new field for syscalls
> to have contents in the userspace like below.
>
> # cat /sys/kernel/tracing/events/syscalls/sys_enter_write/format
> name: sys_enter_write
> ID: 758
> format:
> field:unsigned short common_type; offset:0; size:2; signed:0;
> field:unsigned char common_flags; offset:2; size:1; signed:0;
> field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
> field:int common_pid; offset:4; size:4; signed:1;
>
> field:int __syscall_nr; offset:8; size:4; signed:1;
> field:unsigned int fd; offset:16; size:8; signed:0;
> field:const char * buf; offset:24; size:8; signed:0;
> field:size_t count; offset:32; size:8; signed:0;
> field:__data_loc char[] __buf_val; offset:40; size:4; signed:0;
>
> print fmt: "fd: 0x%08lx, buf: 0x%08lx (%s), count: 0x%08lx", ((unsigned long)(REC->fd)),
> ((unsigned long)(REC->buf)), __print_dynamic_array(__buf_val, 1),
> ((unsigned long)(REC->count))
>
> We have a different way to handle those arguments and this change
> confuses perf trace then make some tests failing. Fix it by skipping
> the new fields that have "__data_loc char[]" type.
>
> Maybe we can switch to this instead of the BPF augmentation later.
>
Even with this patch applied, I still have a segfault with this command:
# ./perf trace -e syscalls:sys_enter_write
0.000 sshd-session/5421 syscalls:sys_enter_write(perf: Segmentation fault
#0 0x560ea815187a in dump_stack debug.c:366
#1 0x560ea81518f0 in sighandler_dump_stack debug.c:378
#2 0x7fb5e14d1df0 in __restore_rt libc_sigaction.c:0
#3 0x560ea7fc0cec in syscall_arg__scnprintf_buf builtin-trace.c:1857
#4 0x560ea7fc2692 in syscall_arg_fmt__scnprintf_val builtin-trace.c:2398
#5 0x560ea7fc2aad in syscall__scnprintf_args builtin-trace.c:2476
#6 0x560ea7fc3ea2 in trace__fprintf_sys_enter builtin-trace.c:2885
#7 0x560ea7fc57a5 in trace__event_handler builtin-trace.c:3312
#8 0x560ea7fc68dd in trace__handle_event builtin-trace.c:3649
#9 0x560ea7fc7f7a in __trace__deliver_event builtin-trace.c:4183
#10 0x560ea7fc80cc in trace__deliver_event builtin-trace.c:4209
#11 0x560ea7fc92b7 in trace__run builtin-trace.c:4577
#12 0x560ea7fcd648 in cmd_trace builtin-trace.c:5773
#13 0x560ea7fd35e3 in run_builtin perf.c:349
#14 0x560ea7fd387b in handle_internal_command perf.c:401
#15 0x560ea7fd39d4 in run_argv perf.c:448
#16 0x560ea7fd3d1d in main perf.c:555
#17 0x7fb5e14bbca8 in __libc_start_call_main libc_start_call_main.h:74
#18 0x7fb5e14bbd65 in __libc_start_main@@GLIBC_2.34 libc-start.c:128
#19 0x560ea7f25f41 in _start perf[53f41]
Segmentation fault
This doesn't crash in a kernel without the __data_loc.
-- Steve
On Thu, Nov 27, 2025 at 08:30:52PM -0500, Steven Rostedt wrote: > On Wed, 26 Nov 2025 20:44:18 -0800 > Namhyung Kim <namhyung@kernel.org> wrote: > > > Recent changes in the linux-next kernel will add new field for syscalls > > to have contents in the userspace like below. > > > > # cat /sys/kernel/tracing/events/syscalls/sys_enter_write/format > > name: sys_enter_write > > ID: 758 > > format: > > field:unsigned short common_type; offset:0; size:2; signed:0; > > field:unsigned char common_flags; offset:2; size:1; signed:0; > > field:unsigned char common_preempt_count; offset:3; size:1; signed:0; > > field:int common_pid; offset:4; size:4; signed:1; > > > > field:int __syscall_nr; offset:8; size:4; signed:1; > > field:unsigned int fd; offset:16; size:8; signed:0; > > field:const char * buf; offset:24; size:8; signed:0; > > field:size_t count; offset:32; size:8; signed:0; > > field:__data_loc char[] __buf_val; offset:40; size:4; signed:0; > > > > print fmt: "fd: 0x%08lx, buf: 0x%08lx (%s), count: 0x%08lx", ((unsigned long)(REC->fd)), > > ((unsigned long)(REC->buf)), __print_dynamic_array(__buf_val, 1), > > ((unsigned long)(REC->count)) > > > > We have a different way to handle those arguments and this change > > confuses perf trace then make some tests failing. Fix it by skipping > > the new fields that have "__data_loc char[]" type. > > > > Maybe we can switch to this instead of the BPF augmentation later. > > > > Even with this patch applied, I still have a segfault with this command: > > # ./perf trace -e syscalls:sys_enter_write What about this? # ./perf trace -e write Thanks, Namhyung > 0.000 sshd-session/5421 syscalls:sys_enter_write(perf: Segmentation fault > #0 0x560ea815187a in dump_stack debug.c:366 > #1 0x560ea81518f0 in sighandler_dump_stack debug.c:378 > #2 0x7fb5e14d1df0 in __restore_rt libc_sigaction.c:0 > #3 0x560ea7fc0cec in syscall_arg__scnprintf_buf builtin-trace.c:1857 > #4 0x560ea7fc2692 in syscall_arg_fmt__scnprintf_val builtin-trace.c:2398 > #5 0x560ea7fc2aad in syscall__scnprintf_args builtin-trace.c:2476 > #6 0x560ea7fc3ea2 in trace__fprintf_sys_enter builtin-trace.c:2885 > #7 0x560ea7fc57a5 in trace__event_handler builtin-trace.c:3312 > #8 0x560ea7fc68dd in trace__handle_event builtin-trace.c:3649 > #9 0x560ea7fc7f7a in __trace__deliver_event builtin-trace.c:4183 > #10 0x560ea7fc80cc in trace__deliver_event builtin-trace.c:4209 > #11 0x560ea7fc92b7 in trace__run builtin-trace.c:4577 > #12 0x560ea7fcd648 in cmd_trace builtin-trace.c:5773 > #13 0x560ea7fd35e3 in run_builtin perf.c:349 > #14 0x560ea7fd387b in handle_internal_command perf.c:401 > #15 0x560ea7fd39d4 in run_argv perf.c:448 > #16 0x560ea7fd3d1d in main perf.c:555 > #17 0x7fb5e14bbca8 in __libc_start_call_main libc_start_call_main.h:74 > #18 0x7fb5e14bbd65 in __libc_start_main@@GLIBC_2.34 libc-start.c:128 > #19 0x560ea7f25f41 in _start perf[53f41] > Segmentation fault > > This doesn't crash in a kernel without the __data_loc. > > -- Steve
On Sat, 29 Nov 2025 11:07:20 -0800 Namhyung Kim <namhyung@kernel.org> wrote: > > Even with this patch applied, I still have a segfault with this command: > > > > # ./perf trace -e syscalls:sys_enter_write > > What about this? > > # ./perf trace -e write Yes that works. As long as it isn't considered a regression that the sys_enter_write crashes, then I'm fine with; Tested-by: Steven Rostedt (Google) <rostedt@goodmis.org> -- Steve
On Sat, Nov 29, 2025 at 02:42:02PM -0500, Steven Rostedt wrote: > On Sat, 29 Nov 2025 11:07:20 -0800 > Namhyung Kim <namhyung@kernel.org> wrote: > > > > Even with this patch applied, I still have a segfault with this command: > > > > > > # ./perf trace -e syscalls:sys_enter_write > > > > What about this? > > > > # ./perf trace -e write > > Yes that works. As long as it isn't considered a regression that the > sys_enter_write crashes, then I'm fine with; > > Tested-by: Steven Rostedt (Google) <rostedt@goodmis.org> Thanks, I think it's a separate issue. Will take a look. Namhyung
On 11/27/25 05:44, Namhyung Kim wrote:
> Recent changes in the linux-next kernel will add new field for syscalls
> to have contents in the userspace like below.
>
> # cat /sys/kernel/tracing/events/syscalls/sys_enter_write/format
> name: sys_enter_write
> ID: 758
> format:
> field:unsigned short common_type; offset:0; size:2; signed:0;
> field:unsigned char common_flags; offset:2; size:1; signed:0;
> field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
> field:int common_pid; offset:4; size:4; signed:1;
>
> field:int __syscall_nr; offset:8; size:4; signed:1;
> field:unsigned int fd; offset:16; size:8; signed:0;
> field:const char * buf; offset:24; size:8; signed:0;
> field:size_t count; offset:32; size:8; signed:0;
> field:__data_loc char[] __buf_val; offset:40; size:4; signed:0;
>
> print fmt: "fd: 0x%08lx, buf: 0x%08lx (%s), count: 0x%08lx", ((unsigned long)(REC->fd)),
> ((unsigned long)(REC->buf)), __print_dynamic_array(__buf_val, 1),
> ((unsigned long)(REC->count))
>
> We have a different way to handle those arguments and this change
> confuses perf trace then make some tests failing. Fix it by skipping
> the new fields that have "__data_loc char[]" type.
>
> Maybe we can switch to this instead of the BPF augmentation later.
>
> Reported-by: Thomas Richter <tmricht@linux.ibm.com>
> Cc: Steven Rostedt <rostedt@goodmis.org>
> Cc: Howard Chu <howardchu95@gmail.com>
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> ---
> tools/perf/builtin-trace.c | 21 +++++++++++++++++++++
> 1 file changed, 21 insertions(+)
>
> diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
> index a743bda294bd3400..baee1f6956001d86 100644
> --- a/tools/perf/builtin-trace.c
> +++ b/tools/perf/builtin-trace.c
> @@ -2069,6 +2069,15 @@ static const struct syscall_arg_fmt *syscall_arg_fmt__find_by_name(const char *n
> return __syscall_arg_fmt__find_by_name(syscall_arg_fmts__by_name, nmemb, name);
> }
>
> +/*
> + * v6.19 kernel added new fields to read userspace memory for event tracing.
> + * But it's not used by perf and confuses the syscall parameters.
> + */
> +static bool is_internal_field(struct tep_format_field *field)
> +{
> + return !strcmp(field->type, "__data_loc char[]");
> +}
> +
> static struct tep_format_field *
> syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field *field,
> bool *use_btf)
> @@ -2077,6 +2086,10 @@ syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field
> int len;
>
> for (; field; field = field->next, ++arg) {
> + /* assume it's the last argument */
> + if (is_internal_field(field))
> + continue;
> +
> last_field = field;
>
> if (arg->scnprintf)
> @@ -2145,6 +2158,7 @@ static int syscall__read_info(struct syscall *sc, struct trace *trace)
> {
> char tp_name[128];
> const char *name;
> + struct tep_format_field *field;
> int err;
>
> if (sc->nonexistent)
> @@ -2201,6 +2215,13 @@ static int syscall__read_info(struct syscall *sc, struct trace *trace)
> --sc->nr_args;
> }
>
> + field = sc->args;
> + while (field) {
> + if (is_internal_field(field))
> + --sc->nr_args;
> + field = field->next;
> + }
> +
> sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
> sc->is_open = !strcmp(name, "open") || !strcmp(name, "openat");
>
With the patch it succeeds again:
❯ ./perf test -F 'perf trace BTF general tests'
Checking if vmlinux BTF exists
Testing perf trace's string augmentation
Testing perf trace's buffer augmentation
Testing perf trace's struct augmentation
110: perf trace BTF general tests : Ok
>
Tested-by: Thomas Richter <tmricht@linux.ibm.com>
--
Thomas Richter, Dept 3303, IBM s390 Linux Development, Boeblingen, Germany
--
IBM Deutschland Research & Development GmbH
Vorsitzender des Aufsichtsrats: Wolfgang Wendt
Geschäftsführung: David Faller
Sitz der Gesellschaft: Böblingen / Registergericht: Amtsgericht Stuttgart, HRB 243294
Hi Namhyung,
On Wed, Nov 26, 2025 at 11:10 PM Thomas Richter <tmricht@linux.ibm.com> wrote:
>
> On 11/27/25 05:44, Namhyung Kim wrote:
> > Recent changes in the linux-next kernel will add new field for syscalls
> > to have contents in the userspace like below.
> >
> > # cat /sys/kernel/tracing/events/syscalls/sys_enter_write/format
> > name: sys_enter_write
> > ID: 758
> > format:
> > field:unsigned short common_type; offset:0; size:2; signed:0;
> > field:unsigned char common_flags; offset:2; size:1; signed:0;
> > field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
> > field:int common_pid; offset:4; size:4; signed:1;
> >
> > field:int __syscall_nr; offset:8; size:4; signed:1;
> > field:unsigned int fd; offset:16; size:8; signed:0;
> > field:const char * buf; offset:24; size:8; signed:0;
> > field:size_t count; offset:32; size:8; signed:0;
> > field:__data_loc char[] __buf_val; offset:40; size:4; signed:0;
> >
> > print fmt: "fd: 0x%08lx, buf: 0x%08lx (%s), count: 0x%08lx", ((unsigned long)(REC->fd)),
> > ((unsigned long)(REC->buf)), __print_dynamic_array(__buf_val, 1),
> > ((unsigned long)(REC->count))
> >
> > We have a different way to handle those arguments and this change
> > confuses perf trace then make some tests failing. Fix it by skipping
> > the new fields that have "__data_loc char[]" type.
> >
> > Maybe we can switch to this instead of the BPF augmentation later.
> >
> > Reported-by: Thomas Richter <tmricht@linux.ibm.com>
> > Cc: Steven Rostedt <rostedt@goodmis.org>
> > Cc: Howard Chu <howardchu95@gmail.com>
> > Signed-off-by: Namhyung Kim <namhyung@kernel.org>
LGTM.
Reviewed-by: Howard Chu <howardchu95@gmail.com>
Thanks,
Howard
> > ---
> > tools/perf/builtin-trace.c | 21 +++++++++++++++++++++
> > 1 file changed, 21 insertions(+)
> >
> > diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
> > index a743bda294bd3400..baee1f6956001d86 100644
> > --- a/tools/perf/builtin-trace.c
> > +++ b/tools/perf/builtin-trace.c
> > @@ -2069,6 +2069,15 @@ static const struct syscall_arg_fmt *syscall_arg_fmt__find_by_name(const char *n
> > return __syscall_arg_fmt__find_by_name(syscall_arg_fmts__by_name, nmemb, name);
> > }
> >
> > +/*
> > + * v6.19 kernel added new fields to read userspace memory for event tracing.
> > + * But it's not used by perf and confuses the syscall parameters.
> > + */
> > +static bool is_internal_field(struct tep_format_field *field)
> > +{
> > + return !strcmp(field->type, "__data_loc char[]");
> > +}
> > +
> > static struct tep_format_field *
> > syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field *field,
> > bool *use_btf)
> > @@ -2077,6 +2086,10 @@ syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field
> > int len;
> >
> > for (; field; field = field->next, ++arg) {
> > + /* assume it's the last argument */
> > + if (is_internal_field(field))
> > + continue;
> > +
> > last_field = field;
> >
> > if (arg->scnprintf)
> > @@ -2145,6 +2158,7 @@ static int syscall__read_info(struct syscall *sc, struct trace *trace)
> > {
> > char tp_name[128];
> > const char *name;
> > + struct tep_format_field *field;
> > int err;
> >
> > if (sc->nonexistent)
> > @@ -2201,6 +2215,13 @@ static int syscall__read_info(struct syscall *sc, struct trace *trace)
> > --sc->nr_args;
> > }
> >
> > + field = sc->args;
> > + while (field) {
> > + if (is_internal_field(field))
> > + --sc->nr_args;
> > + field = field->next;
> > + }
> > +
> > sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
> > sc->is_open = !strcmp(name, "open") || !strcmp(name, "openat");
> >
>
> With the patch it succeeds again:
> ❯ ./perf test -F 'perf trace BTF general tests'
> Checking if vmlinux BTF exists
> Testing perf trace's string augmentation
> Testing perf trace's buffer augmentation
> Testing perf trace's struct augmentation
> 110: perf trace BTF general tests : Ok
> >
>
> Tested-by: Thomas Richter <tmricht@linux.ibm.com>
> --
> Thomas Richter, Dept 3303, IBM s390 Linux Development, Boeblingen, Germany
> --
> IBM Deutschland Research & Development GmbH
>
> Vorsitzender des Aufsichtsrats: Wolfgang Wendt
>
> Geschäftsführung: David Faller
>
> Sitz der Gesellschaft: Böblingen / Registergericht: Amtsgericht Stuttgart, HRB 243294
© 2016 - 2025 Red Hat, Inc.