tools/perf/Makefile.perf | 10 +- tools/perf/arch/alpha/entry/syscalls/Kbuild | 2 - .../alpha/entry/syscalls/Makefile.syscalls | 5 - tools/perf/arch/alpha/include/syscall_table.h | 2 - tools/perf/arch/arc/entry/syscalls/Kbuild | 2 - .../arch/arc/entry/syscalls/Makefile.syscalls | 3 - tools/perf/arch/arc/include/syscall_table.h | 2 - tools/perf/arch/arm/entry/syscalls/Kbuild | 4 - .../arch/arm/entry/syscalls/Makefile.syscalls | 2 - tools/perf/arch/arm/include/syscall_table.h | 2 - tools/perf/arch/arm64/entry/syscalls/Kbuild | 3 - .../arm64/entry/syscalls/Makefile.syscalls | 6 - tools/perf/arch/arm64/include/syscall_table.h | 8 - tools/perf/arch/csky/entry/syscalls/Kbuild | 2 - .../csky/entry/syscalls/Makefile.syscalls | 3 - tools/perf/arch/csky/include/syscall_table.h | 2 - .../perf/arch/loongarch/entry/syscalls/Kbuild | 2 - .../entry/syscalls/Makefile.syscalls | 3 - .../arch/loongarch/include/syscall_table.h | 2 - tools/perf/arch/mips/entry/syscalls/Kbuild | 2 - .../mips/entry/syscalls/Makefile.syscalls | 5 - tools/perf/arch/mips/include/syscall_table.h | 2 - tools/perf/arch/parisc/entry/syscalls/Kbuild | 3 - .../parisc/entry/syscalls/Makefile.syscalls | 6 - .../perf/arch/parisc/include/syscall_table.h | 8 - tools/perf/arch/powerpc/entry/syscalls/Kbuild | 3 - .../powerpc/entry/syscalls/Makefile.syscalls | 6 - .../perf/arch/powerpc/include/syscall_table.h | 8 - tools/perf/arch/riscv/entry/syscalls/Kbuild | 2 - .../riscv/entry/syscalls/Makefile.syscalls | 4 - tools/perf/arch/riscv/include/syscall_table.h | 8 - tools/perf/arch/s390/entry/syscalls/Kbuild | 2 - .../s390/entry/syscalls/Makefile.syscalls | 5 - tools/perf/arch/s390/include/syscall_table.h | 2 - tools/perf/arch/sh/entry/syscalls/Kbuild | 2 - .../arch/sh/entry/syscalls/Makefile.syscalls | 4 - tools/perf/arch/sh/include/syscall_table.h | 2 - tools/perf/arch/sparc/entry/syscalls/Kbuild | 3 - .../sparc/entry/syscalls/Makefile.syscalls | 5 - tools/perf/arch/sparc/include/syscall_table.h | 8 - tools/perf/arch/x86/entry/syscalls/Kbuild | 3 - .../arch/x86/entry/syscalls/Makefile.syscalls | 6 - tools/perf/arch/x86/include/syscall_table.h | 8 - tools/perf/arch/xtensa/entry/syscalls/Kbuild | 2 - .../xtensa/entry/syscalls/Makefile.syscalls | 4 - .../perf/arch/xtensa/include/syscall_table.h | 2 - tools/perf/builtin-trace.c | 290 +++++++++++------- tools/perf/scripts/Makefile.syscalls | 61 ---- tools/perf/scripts/syscalltbl.sh | 86 ------ tools/perf/trace/beauty/syscalltbl.sh | 274 +++++++++++++++++ tools/perf/util/dso.c | 88 ++++++ tools/perf/util/dso.h | 58 ++++ tools/perf/util/symbol-elf.c | 27 -- tools/perf/util/syscalltbl.c | 148 ++++----- tools/perf/util/syscalltbl.h | 22 +- tools/perf/util/thread.c | 80 +++++ tools/perf/util/thread.h | 14 +- 57 files changed, 792 insertions(+), 536 deletions(-) delete mode 100644 tools/perf/arch/alpha/entry/syscalls/Kbuild delete mode 100644 tools/perf/arch/alpha/entry/syscalls/Makefile.syscalls delete mode 100644 tools/perf/arch/alpha/include/syscall_table.h delete mode 100644 tools/perf/arch/arc/entry/syscalls/Kbuild delete mode 100644 tools/perf/arch/arc/entry/syscalls/Makefile.syscalls delete mode 100644 tools/perf/arch/arc/include/syscall_table.h delete mode 100644 tools/perf/arch/arm/entry/syscalls/Kbuild delete mode 100644 tools/perf/arch/arm/entry/syscalls/Makefile.syscalls delete mode 100644 tools/perf/arch/arm/include/syscall_table.h delete mode 100644 tools/perf/arch/arm64/entry/syscalls/Kbuild delete mode 100644 tools/perf/arch/arm64/entry/syscalls/Makefile.syscalls delete mode 100644 tools/perf/arch/arm64/include/syscall_table.h delete mode 100644 tools/perf/arch/csky/entry/syscalls/Kbuild delete mode 100644 tools/perf/arch/csky/entry/syscalls/Makefile.syscalls delete mode 100644 tools/perf/arch/csky/include/syscall_table.h delete mode 100644 tools/perf/arch/loongarch/entry/syscalls/Kbuild delete mode 100644 tools/perf/arch/loongarch/entry/syscalls/Makefile.syscalls delete mode 100644 tools/perf/arch/loongarch/include/syscall_table.h delete mode 100644 tools/perf/arch/mips/entry/syscalls/Kbuild delete mode 100644 tools/perf/arch/mips/entry/syscalls/Makefile.syscalls delete mode 100644 tools/perf/arch/mips/include/syscall_table.h delete mode 100644 tools/perf/arch/parisc/entry/syscalls/Kbuild delete mode 100644 tools/perf/arch/parisc/entry/syscalls/Makefile.syscalls delete mode 100644 tools/perf/arch/parisc/include/syscall_table.h delete mode 100644 tools/perf/arch/powerpc/entry/syscalls/Kbuild delete mode 100644 tools/perf/arch/powerpc/entry/syscalls/Makefile.syscalls delete mode 100644 tools/perf/arch/powerpc/include/syscall_table.h delete mode 100644 tools/perf/arch/riscv/entry/syscalls/Kbuild delete mode 100644 tools/perf/arch/riscv/entry/syscalls/Makefile.syscalls delete mode 100644 tools/perf/arch/riscv/include/syscall_table.h delete mode 100644 tools/perf/arch/s390/entry/syscalls/Kbuild delete mode 100644 tools/perf/arch/s390/entry/syscalls/Makefile.syscalls delete mode 100644 tools/perf/arch/s390/include/syscall_table.h delete mode 100644 tools/perf/arch/sh/entry/syscalls/Kbuild delete mode 100644 tools/perf/arch/sh/entry/syscalls/Makefile.syscalls delete mode 100644 tools/perf/arch/sh/include/syscall_table.h delete mode 100644 tools/perf/arch/sparc/entry/syscalls/Kbuild delete mode 100644 tools/perf/arch/sparc/entry/syscalls/Makefile.syscalls delete mode 100644 tools/perf/arch/sparc/include/syscall_table.h delete mode 100644 tools/perf/arch/x86/entry/syscalls/Kbuild delete mode 100644 tools/perf/arch/x86/entry/syscalls/Makefile.syscalls delete mode 100644 tools/perf/arch/x86/include/syscall_table.h delete mode 100644 tools/perf/arch/xtensa/entry/syscalls/Kbuild delete mode 100644 tools/perf/arch/xtensa/entry/syscalls/Makefile.syscalls delete mode 100644 tools/perf/arch/xtensa/include/syscall_table.h delete mode 100644 tools/perf/scripts/Makefile.syscalls delete mode 100755 tools/perf/scripts/syscalltbl.sh create mode 100755 tools/perf/trace/beauty/syscalltbl.sh
This work builds on the clean up of system call tables and removal of
libaudit by Charlie Jenkins <charlie@rivosinc.com>.
The system call table in perf trace is used to map system call numbers
to names and vice versa. Prior to these changes, a single table
matching the perf binary's build was present. The table would be
incorrect if tracing say a 32-bit binary from a 64-bit version of
perf, the names and numbers wouldn't match.
Change the build so that a single system call file is built and the
potentially multiple tables are identifiable from the ELF machine type
of the process being examined. To determine the ELF machine type, the
executable's maps are searched and the associated DSOs ELF headers are
read. When this fails and when live, /proc/pid/exe's ELF header is
read. Fallback to using the perf's binary type when unknown.
Remove some runtime types used by the system call tables and make
equivalents generated at build time.
v5: Add byte swap to dso__e_machine and fix comment as suggested by
Namhyung.
v4: Add reading the e_machine from the thread's maps dsos, only read
from /proc/pid/exe on failure and when live as requested by
Namhyung. Add patches to add dso comments and remove unused
dso_data variables that are unused without libunwind.
v3: Add Charlie's reviewed-by tags. Incorporate feedback from Arnd
Bergmann <arnd@arndb.de> on additional optional column and MIPS
system call numbering. Rebase past Namhyung's global system call
statistics and add comments that they don't yet support an
e_machine other than EM_HOST.
v2: Change the 1 element cache for the last table as suggested by
Howard Chu, add Howard's reviewed-by tags.
Add a comment and apology to Charlie for not doing better in
guiding:
https://lore.kernel.org/all/20250114-perf_syscall_arch_runtime-v1-1-5b304e408e11@rivosinc.com/
After discussion on v1 and he agreed this patch series would be
the better direction.
Ian Rogers (11):
perf dso: Move libunwind dso_data variables into ifdef
perf dso: kernel-doc for enum dso_binary_type
perf syscalltbl: Remove syscall_table.h
perf trace: Reorganize syscalls
perf syscalltbl: Remove struct syscalltbl
perf dso: Add support for reading the e_machine type for a dso
perf thread: Add support for reading the e_machine type for a thread
perf trace beauty: Add syscalltbl.sh generating all system call tables
perf syscalltbl: Use lookup table containing multiple architectures
perf build: Remove Makefile.syscalls
perf syscalltbl: Mask off ABI type for MIPS system calls
tools/perf/Makefile.perf | 10 +-
tools/perf/arch/alpha/entry/syscalls/Kbuild | 2 -
.../alpha/entry/syscalls/Makefile.syscalls | 5 -
tools/perf/arch/alpha/include/syscall_table.h | 2 -
tools/perf/arch/arc/entry/syscalls/Kbuild | 2 -
.../arch/arc/entry/syscalls/Makefile.syscalls | 3 -
tools/perf/arch/arc/include/syscall_table.h | 2 -
tools/perf/arch/arm/entry/syscalls/Kbuild | 4 -
.../arch/arm/entry/syscalls/Makefile.syscalls | 2 -
tools/perf/arch/arm/include/syscall_table.h | 2 -
tools/perf/arch/arm64/entry/syscalls/Kbuild | 3 -
.../arm64/entry/syscalls/Makefile.syscalls | 6 -
tools/perf/arch/arm64/include/syscall_table.h | 8 -
tools/perf/arch/csky/entry/syscalls/Kbuild | 2 -
.../csky/entry/syscalls/Makefile.syscalls | 3 -
tools/perf/arch/csky/include/syscall_table.h | 2 -
.../perf/arch/loongarch/entry/syscalls/Kbuild | 2 -
.../entry/syscalls/Makefile.syscalls | 3 -
.../arch/loongarch/include/syscall_table.h | 2 -
tools/perf/arch/mips/entry/syscalls/Kbuild | 2 -
.../mips/entry/syscalls/Makefile.syscalls | 5 -
tools/perf/arch/mips/include/syscall_table.h | 2 -
tools/perf/arch/parisc/entry/syscalls/Kbuild | 3 -
.../parisc/entry/syscalls/Makefile.syscalls | 6 -
.../perf/arch/parisc/include/syscall_table.h | 8 -
tools/perf/arch/powerpc/entry/syscalls/Kbuild | 3 -
.../powerpc/entry/syscalls/Makefile.syscalls | 6 -
.../perf/arch/powerpc/include/syscall_table.h | 8 -
tools/perf/arch/riscv/entry/syscalls/Kbuild | 2 -
.../riscv/entry/syscalls/Makefile.syscalls | 4 -
tools/perf/arch/riscv/include/syscall_table.h | 8 -
tools/perf/arch/s390/entry/syscalls/Kbuild | 2 -
.../s390/entry/syscalls/Makefile.syscalls | 5 -
tools/perf/arch/s390/include/syscall_table.h | 2 -
tools/perf/arch/sh/entry/syscalls/Kbuild | 2 -
.../arch/sh/entry/syscalls/Makefile.syscalls | 4 -
tools/perf/arch/sh/include/syscall_table.h | 2 -
tools/perf/arch/sparc/entry/syscalls/Kbuild | 3 -
.../sparc/entry/syscalls/Makefile.syscalls | 5 -
tools/perf/arch/sparc/include/syscall_table.h | 8 -
tools/perf/arch/x86/entry/syscalls/Kbuild | 3 -
.../arch/x86/entry/syscalls/Makefile.syscalls | 6 -
tools/perf/arch/x86/include/syscall_table.h | 8 -
tools/perf/arch/xtensa/entry/syscalls/Kbuild | 2 -
.../xtensa/entry/syscalls/Makefile.syscalls | 4 -
.../perf/arch/xtensa/include/syscall_table.h | 2 -
tools/perf/builtin-trace.c | 290 +++++++++++-------
tools/perf/scripts/Makefile.syscalls | 61 ----
tools/perf/scripts/syscalltbl.sh | 86 ------
tools/perf/trace/beauty/syscalltbl.sh | 274 +++++++++++++++++
tools/perf/util/dso.c | 88 ++++++
tools/perf/util/dso.h | 58 ++++
tools/perf/util/symbol-elf.c | 27 --
tools/perf/util/syscalltbl.c | 148 ++++-----
tools/perf/util/syscalltbl.h | 22 +-
tools/perf/util/thread.c | 80 +++++
tools/perf/util/thread.h | 14 +-
57 files changed, 792 insertions(+), 536 deletions(-)
delete mode 100644 tools/perf/arch/alpha/entry/syscalls/Kbuild
delete mode 100644 tools/perf/arch/alpha/entry/syscalls/Makefile.syscalls
delete mode 100644 tools/perf/arch/alpha/include/syscall_table.h
delete mode 100644 tools/perf/arch/arc/entry/syscalls/Kbuild
delete mode 100644 tools/perf/arch/arc/entry/syscalls/Makefile.syscalls
delete mode 100644 tools/perf/arch/arc/include/syscall_table.h
delete mode 100644 tools/perf/arch/arm/entry/syscalls/Kbuild
delete mode 100644 tools/perf/arch/arm/entry/syscalls/Makefile.syscalls
delete mode 100644 tools/perf/arch/arm/include/syscall_table.h
delete mode 100644 tools/perf/arch/arm64/entry/syscalls/Kbuild
delete mode 100644 tools/perf/arch/arm64/entry/syscalls/Makefile.syscalls
delete mode 100644 tools/perf/arch/arm64/include/syscall_table.h
delete mode 100644 tools/perf/arch/csky/entry/syscalls/Kbuild
delete mode 100644 tools/perf/arch/csky/entry/syscalls/Makefile.syscalls
delete mode 100644 tools/perf/arch/csky/include/syscall_table.h
delete mode 100644 tools/perf/arch/loongarch/entry/syscalls/Kbuild
delete mode 100644 tools/perf/arch/loongarch/entry/syscalls/Makefile.syscalls
delete mode 100644 tools/perf/arch/loongarch/include/syscall_table.h
delete mode 100644 tools/perf/arch/mips/entry/syscalls/Kbuild
delete mode 100644 tools/perf/arch/mips/entry/syscalls/Makefile.syscalls
delete mode 100644 tools/perf/arch/mips/include/syscall_table.h
delete mode 100644 tools/perf/arch/parisc/entry/syscalls/Kbuild
delete mode 100644 tools/perf/arch/parisc/entry/syscalls/Makefile.syscalls
delete mode 100644 tools/perf/arch/parisc/include/syscall_table.h
delete mode 100644 tools/perf/arch/powerpc/entry/syscalls/Kbuild
delete mode 100644 tools/perf/arch/powerpc/entry/syscalls/Makefile.syscalls
delete mode 100644 tools/perf/arch/powerpc/include/syscall_table.h
delete mode 100644 tools/perf/arch/riscv/entry/syscalls/Kbuild
delete mode 100644 tools/perf/arch/riscv/entry/syscalls/Makefile.syscalls
delete mode 100644 tools/perf/arch/riscv/include/syscall_table.h
delete mode 100644 tools/perf/arch/s390/entry/syscalls/Kbuild
delete mode 100644 tools/perf/arch/s390/entry/syscalls/Makefile.syscalls
delete mode 100644 tools/perf/arch/s390/include/syscall_table.h
delete mode 100644 tools/perf/arch/sh/entry/syscalls/Kbuild
delete mode 100644 tools/perf/arch/sh/entry/syscalls/Makefile.syscalls
delete mode 100644 tools/perf/arch/sh/include/syscall_table.h
delete mode 100644 tools/perf/arch/sparc/entry/syscalls/Kbuild
delete mode 100644 tools/perf/arch/sparc/entry/syscalls/Makefile.syscalls
delete mode 100644 tools/perf/arch/sparc/include/syscall_table.h
delete mode 100644 tools/perf/arch/x86/entry/syscalls/Kbuild
delete mode 100644 tools/perf/arch/x86/entry/syscalls/Makefile.syscalls
delete mode 100644 tools/perf/arch/x86/include/syscall_table.h
delete mode 100644 tools/perf/arch/xtensa/entry/syscalls/Kbuild
delete mode 100644 tools/perf/arch/xtensa/entry/syscalls/Makefile.syscalls
delete mode 100644 tools/perf/arch/xtensa/include/syscall_table.h
delete mode 100644 tools/perf/scripts/Makefile.syscalls
delete mode 100755 tools/perf/scripts/syscalltbl.sh
create mode 100755 tools/perf/trace/beauty/syscalltbl.sh
--
2.49.0.rc0.332.g42c0ae87b1-goog
On Fri, Mar 07, 2025 at 04:31:58PM -0800, Ian Rogers wrote:
> This work builds on the clean up of system call tables and removal of
> libaudit by Charlie Jenkins <charlie@rivosinc.com>.
>
> The system call table in perf trace is used to map system call numbers
> to names and vice versa. Prior to these changes, a single table
> matching the perf binary's build was present. The table would be
> incorrect if tracing say a 32-bit binary from a 64-bit version of
> perf, the names and numbers wouldn't match.
>
> Change the build so that a single system call file is built and the
> potentially multiple tables are identifiable from the ELF machine type
> of the process being examined. To determine the ELF machine type, the
> executable's maps are searched and the associated DSOs ELF headers are
> read. When this fails and when live, /proc/pid/exe's ELF header is
> read. Fallback to using the perf's binary type when unknown.
Now it works well for me!
$ sudo ./perf trace a32.out
? ( ): a32.out/1267727 ... [continued]: execve()) = 0
? ( ): a32.out/1267727 ... [continued]: brk()) = 0x57f33000
0.062 ( 0.003 ms): a32.out/1267727 access(filename: 0xf7f170cc, mode: R) = -1 ENOENT (No such file or directory)
0.070 ( 0.011 ms): a32.out/1267727 openat(dfd: CWD, filename: 0xf7f1347f, flags: RDONLY|CLOEXEC|LARGEFILE) = 3
0.070 ( 0.023 ms): a32.out/1267727 ... [continued]: close()) = 0
0.103 ( 0.009 ms): a32.out/1267727 openat(dfd: CWD, filename: 0xf7ee43e0, flags: RDONLY|CLOEXEC|LARGEFILE) = 3
0.113 ( 0.002 ms): a32.out/1267727 read(fd: 3, buf: 0xff854990, count: 512) = 512
0.113 ( 0.049 ms): a32.out/1267727 ... [continued]: close()) = 0
0.113 ( 0.060 ms): a32.out/1267727 ... [continued]: set_tid_address()) = 1267727 (a32.out)
0.175 ( 0.001 ms): a32.out/1267727 set_robust_list(head: 0xf7ee556c, len: 12) =
0.222 ( 0.005 ms): a32.out/1267727 mprotect(start: 0xf7ebc000, len: 8192, prot: READ) = 0
0.230 ( 0.004 ms): a32.out/1267727 mprotect(start: 0x565b1000, len: 4096, prot: READ) = 0
0.237 ( 0.003 ms): a32.out/1267727 mprotect(start: 0xf7f1f000, len: 8192, prot: READ) = 0
0.258 ( 0.006 ms): a32.out/1267727 munmap(addr: 0xf7ec9000, len: 108298) = 0
0.258 ( 0.027 ms): a32.out/1267727 ... [continued]: brk()) = 0x57f33000
0.258 ( 0.031 ms): a32.out/1267727 ... [continued]: brk()) = 0x57f54000
0.258 ( 0.033 ms): a32.out/1267727 ... [continued]: brk()) = 0x57f55000
0.296 ( 0.008 ms): a32.out/1267727 openat(dfd: CWD, filename: 0x565b000a) = 3
0.316 ( 0.002 ms): a32.out/1267727 read(fd: 3, buf: 0xff8544a8, count: 4096) = 211
0.319 ( 0.001 ms): a32.out/1267727 read(fd: 3, buf: 0x57f332e0, count: 4096) = 0
0.319 ( 0.005 ms): a32.out/1267727 ... [continued]: close()) = 0
0.319 ( 0.010 ms): a32.out/1267727 ... [continued]: brk()) = 0x57f54000
0.337 ( ): a32.out/1267727 exit_group() = ?
Thanks,
Namhyung
>
> Remove some runtime types used by the system call tables and make
> equivalents generated at build time.
>
> v5: Add byte swap to dso__e_machine and fix comment as suggested by
> Namhyung.
>
> v4: Add reading the e_machine from the thread's maps dsos, only read
> from /proc/pid/exe on failure and when live as requested by
> Namhyung. Add patches to add dso comments and remove unused
> dso_data variables that are unused without libunwind.
>
> v3: Add Charlie's reviewed-by tags. Incorporate feedback from Arnd
> Bergmann <arnd@arndb.de> on additional optional column and MIPS
> system call numbering. Rebase past Namhyung's global system call
> statistics and add comments that they don't yet support an
> e_machine other than EM_HOST.
>
> v2: Change the 1 element cache for the last table as suggested by
> Howard Chu, add Howard's reviewed-by tags.
> Add a comment and apology to Charlie for not doing better in
> guiding:
> https://lore.kernel.org/all/20250114-perf_syscall_arch_runtime-v1-1-5b304e408e11@rivosinc.com/
> After discussion on v1 and he agreed this patch series would be
> the better direction.
>
> Ian Rogers (11):
> perf dso: Move libunwind dso_data variables into ifdef
> perf dso: kernel-doc for enum dso_binary_type
> perf syscalltbl: Remove syscall_table.h
> perf trace: Reorganize syscalls
> perf syscalltbl: Remove struct syscalltbl
> perf dso: Add support for reading the e_machine type for a dso
> perf thread: Add support for reading the e_machine type for a thread
> perf trace beauty: Add syscalltbl.sh generating all system call tables
> perf syscalltbl: Use lookup table containing multiple architectures
> perf build: Remove Makefile.syscalls
> perf syscalltbl: Mask off ABI type for MIPS system calls
>
> tools/perf/Makefile.perf | 10 +-
> tools/perf/arch/alpha/entry/syscalls/Kbuild | 2 -
> .../alpha/entry/syscalls/Makefile.syscalls | 5 -
> tools/perf/arch/alpha/include/syscall_table.h | 2 -
> tools/perf/arch/arc/entry/syscalls/Kbuild | 2 -
> .../arch/arc/entry/syscalls/Makefile.syscalls | 3 -
> tools/perf/arch/arc/include/syscall_table.h | 2 -
> tools/perf/arch/arm/entry/syscalls/Kbuild | 4 -
> .../arch/arm/entry/syscalls/Makefile.syscalls | 2 -
> tools/perf/arch/arm/include/syscall_table.h | 2 -
> tools/perf/arch/arm64/entry/syscalls/Kbuild | 3 -
> .../arm64/entry/syscalls/Makefile.syscalls | 6 -
> tools/perf/arch/arm64/include/syscall_table.h | 8 -
> tools/perf/arch/csky/entry/syscalls/Kbuild | 2 -
> .../csky/entry/syscalls/Makefile.syscalls | 3 -
> tools/perf/arch/csky/include/syscall_table.h | 2 -
> .../perf/arch/loongarch/entry/syscalls/Kbuild | 2 -
> .../entry/syscalls/Makefile.syscalls | 3 -
> .../arch/loongarch/include/syscall_table.h | 2 -
> tools/perf/arch/mips/entry/syscalls/Kbuild | 2 -
> .../mips/entry/syscalls/Makefile.syscalls | 5 -
> tools/perf/arch/mips/include/syscall_table.h | 2 -
> tools/perf/arch/parisc/entry/syscalls/Kbuild | 3 -
> .../parisc/entry/syscalls/Makefile.syscalls | 6 -
> .../perf/arch/parisc/include/syscall_table.h | 8 -
> tools/perf/arch/powerpc/entry/syscalls/Kbuild | 3 -
> .../powerpc/entry/syscalls/Makefile.syscalls | 6 -
> .../perf/arch/powerpc/include/syscall_table.h | 8 -
> tools/perf/arch/riscv/entry/syscalls/Kbuild | 2 -
> .../riscv/entry/syscalls/Makefile.syscalls | 4 -
> tools/perf/arch/riscv/include/syscall_table.h | 8 -
> tools/perf/arch/s390/entry/syscalls/Kbuild | 2 -
> .../s390/entry/syscalls/Makefile.syscalls | 5 -
> tools/perf/arch/s390/include/syscall_table.h | 2 -
> tools/perf/arch/sh/entry/syscalls/Kbuild | 2 -
> .../arch/sh/entry/syscalls/Makefile.syscalls | 4 -
> tools/perf/arch/sh/include/syscall_table.h | 2 -
> tools/perf/arch/sparc/entry/syscalls/Kbuild | 3 -
> .../sparc/entry/syscalls/Makefile.syscalls | 5 -
> tools/perf/arch/sparc/include/syscall_table.h | 8 -
> tools/perf/arch/x86/entry/syscalls/Kbuild | 3 -
> .../arch/x86/entry/syscalls/Makefile.syscalls | 6 -
> tools/perf/arch/x86/include/syscall_table.h | 8 -
> tools/perf/arch/xtensa/entry/syscalls/Kbuild | 2 -
> .../xtensa/entry/syscalls/Makefile.syscalls | 4 -
> .../perf/arch/xtensa/include/syscall_table.h | 2 -
> tools/perf/builtin-trace.c | 290 +++++++++++-------
> tools/perf/scripts/Makefile.syscalls | 61 ----
> tools/perf/scripts/syscalltbl.sh | 86 ------
> tools/perf/trace/beauty/syscalltbl.sh | 274 +++++++++++++++++
> tools/perf/util/dso.c | 88 ++++++
> tools/perf/util/dso.h | 58 ++++
> tools/perf/util/symbol-elf.c | 27 --
> tools/perf/util/syscalltbl.c | 148 ++++-----
> tools/perf/util/syscalltbl.h | 22 +-
> tools/perf/util/thread.c | 80 +++++
> tools/perf/util/thread.h | 14 +-
> 57 files changed, 792 insertions(+), 536 deletions(-)
> delete mode 100644 tools/perf/arch/alpha/entry/syscalls/Kbuild
> delete mode 100644 tools/perf/arch/alpha/entry/syscalls/Makefile.syscalls
> delete mode 100644 tools/perf/arch/alpha/include/syscall_table.h
> delete mode 100644 tools/perf/arch/arc/entry/syscalls/Kbuild
> delete mode 100644 tools/perf/arch/arc/entry/syscalls/Makefile.syscalls
> delete mode 100644 tools/perf/arch/arc/include/syscall_table.h
> delete mode 100644 tools/perf/arch/arm/entry/syscalls/Kbuild
> delete mode 100644 tools/perf/arch/arm/entry/syscalls/Makefile.syscalls
> delete mode 100644 tools/perf/arch/arm/include/syscall_table.h
> delete mode 100644 tools/perf/arch/arm64/entry/syscalls/Kbuild
> delete mode 100644 tools/perf/arch/arm64/entry/syscalls/Makefile.syscalls
> delete mode 100644 tools/perf/arch/arm64/include/syscall_table.h
> delete mode 100644 tools/perf/arch/csky/entry/syscalls/Kbuild
> delete mode 100644 tools/perf/arch/csky/entry/syscalls/Makefile.syscalls
> delete mode 100644 tools/perf/arch/csky/include/syscall_table.h
> delete mode 100644 tools/perf/arch/loongarch/entry/syscalls/Kbuild
> delete mode 100644 tools/perf/arch/loongarch/entry/syscalls/Makefile.syscalls
> delete mode 100644 tools/perf/arch/loongarch/include/syscall_table.h
> delete mode 100644 tools/perf/arch/mips/entry/syscalls/Kbuild
> delete mode 100644 tools/perf/arch/mips/entry/syscalls/Makefile.syscalls
> delete mode 100644 tools/perf/arch/mips/include/syscall_table.h
> delete mode 100644 tools/perf/arch/parisc/entry/syscalls/Kbuild
> delete mode 100644 tools/perf/arch/parisc/entry/syscalls/Makefile.syscalls
> delete mode 100644 tools/perf/arch/parisc/include/syscall_table.h
> delete mode 100644 tools/perf/arch/powerpc/entry/syscalls/Kbuild
> delete mode 100644 tools/perf/arch/powerpc/entry/syscalls/Makefile.syscalls
> delete mode 100644 tools/perf/arch/powerpc/include/syscall_table.h
> delete mode 100644 tools/perf/arch/riscv/entry/syscalls/Kbuild
> delete mode 100644 tools/perf/arch/riscv/entry/syscalls/Makefile.syscalls
> delete mode 100644 tools/perf/arch/riscv/include/syscall_table.h
> delete mode 100644 tools/perf/arch/s390/entry/syscalls/Kbuild
> delete mode 100644 tools/perf/arch/s390/entry/syscalls/Makefile.syscalls
> delete mode 100644 tools/perf/arch/s390/include/syscall_table.h
> delete mode 100644 tools/perf/arch/sh/entry/syscalls/Kbuild
> delete mode 100644 tools/perf/arch/sh/entry/syscalls/Makefile.syscalls
> delete mode 100644 tools/perf/arch/sh/include/syscall_table.h
> delete mode 100644 tools/perf/arch/sparc/entry/syscalls/Kbuild
> delete mode 100644 tools/perf/arch/sparc/entry/syscalls/Makefile.syscalls
> delete mode 100644 tools/perf/arch/sparc/include/syscall_table.h
> delete mode 100644 tools/perf/arch/x86/entry/syscalls/Kbuild
> delete mode 100644 tools/perf/arch/x86/entry/syscalls/Makefile.syscalls
> delete mode 100644 tools/perf/arch/x86/include/syscall_table.h
> delete mode 100644 tools/perf/arch/xtensa/entry/syscalls/Kbuild
> delete mode 100644 tools/perf/arch/xtensa/entry/syscalls/Makefile.syscalls
> delete mode 100644 tools/perf/arch/xtensa/include/syscall_table.h
> delete mode 100644 tools/perf/scripts/Makefile.syscalls
> delete mode 100755 tools/perf/scripts/syscalltbl.sh
> create mode 100755 tools/perf/trace/beauty/syscalltbl.sh
>
> --
> 2.49.0.rc0.332.g42c0ae87b1-goog
>
On Thu, Mar 13, 2025 at 12:11:40AM -0700, Namhyung Kim wrote: > On Fri, Mar 07, 2025 at 04:31:58PM -0800, Ian Rogers wrote: > > This work builds on the clean up of system call tables and removal of > > libaudit by Charlie Jenkins <charlie@rivosinc.com>. > > > > The system call table in perf trace is used to map system call numbers > > to names and vice versa. Prior to these changes, a single table > > matching the perf binary's build was present. The table would be > > incorrect if tracing say a 32-bit binary from a 64-bit version of > > perf, the names and numbers wouldn't match. > > > > Change the build so that a single system call file is built and the > > potentially multiple tables are identifiable from the ELF machine type > > of the process being examined. To determine the ELF machine type, the > > executable's maps are searched and the associated DSOs ELF headers are > > read. When this fails and when live, /proc/pid/exe's ELF header is > > read. Fallback to using the perf's binary type when unknown. > > Now it works well for me! Its working for me on x86_64 as well, I'm doing some more tests, the container builds and will do 32-bit tracing on 64-bit ARM (rpi5 aarch64) and then report results here, should be later today as the default kernel for the rpi5 doesn't come with CONFIG_FTRACE_SYSCALLS=y and BTF, so building one with it. - Arnaldo
On Thu, Mar 13, 2025 at 04:49:52PM -0300, Arnaldo Carvalho de Melo wrote: > On Thu, Mar 13, 2025 at 12:11:40AM -0700, Namhyung Kim wrote: > > On Fri, Mar 07, 2025 at 04:31:58PM -0800, Ian Rogers wrote: > > > This work builds on the clean up of system call tables and removal of > > > libaudit by Charlie Jenkins <charlie@rivosinc.com>. > > > > > > The system call table in perf trace is used to map system call numbers > > > to names and vice versa. Prior to these changes, a single table > > > matching the perf binary's build was present. The table would be > > > incorrect if tracing say a 32-bit binary from a 64-bit version of > > > perf, the names and numbers wouldn't match. > > > > > > Change the build so that a single system call file is built and the > > > potentially multiple tables are identifiable from the ELF machine type > > > of the process being examined. To determine the ELF machine type, the > > > executable's maps are searched and the associated DSOs ELF headers are > > > read. When this fails and when live, /proc/pid/exe's ELF header is > > > read. Fallback to using the perf's binary type when unknown. > > > > Now it works well for me! > > Its working for me on x86_64 as well, I'm doing some more tests, the > container builds and will do 32-bit tracing on 64-bit ARM (rpi5 aarch64) > and then report results here, should be later today as the default > kernel for the rpi5 doesn't come with CONFIG_FTRACE_SYSCALLS=y and BTF, > so building one with it. Still building, but noticed this on x86_64: 105: perf trace enum augmentation tests : FAILED! 106: perf trace BTF general tests : FAILED! 107: perf trace exit race : Ok 108: perf trace record and replay : FAILED! The first doesn´t help that much with verbose mode, haven't checked if before this series it was failing :-\ root@x1:~# perf test -vvv 105 105: perf trace enum augmentation tests: --- start --- test child forked, pid 19411 Checking if vmlinux exists Tracing syscall landlock_add_rule ---- end(-1) ---- 105: perf trace enum augmentation tests : FAILED! root@x1:~# Ditto for 106: root@x1:~# perf test -vv 106 106: perf trace BTF general tests: --- start --- test child forked, pid 19467 Checking if vmlinux BTF exists Testing perf trace's string augmentation Testing perf trace's buffer augmentation Buffer augmentation test failed ---- end(-1) ---- 106: perf trace BTF general tests : FAILED! root@x1:~# 108 works when its the only test: root@x1:~# perf test 108 108: perf trace record and replay : Ok root@x1:~# perf test 108 108: perf trace record and replay : Ok root@x1:~# perf test 108 108: perf trace record and replay : Ok root@x1:~# I'll try to check what is happening with the first two later today. - Arnaldo
On Thu, Mar 13, 2025 at 05:20:09PM -0300, Arnaldo Carvalho de Melo wrote:
> Still building, but noticed this on x86_64:
>
> 105: perf trace enum augmentation tests : FAILED!
> 106: perf trace BTF general tests : FAILED!
> 107: perf trace exit race : Ok
> 108: perf trace record and replay : FAILED!
>
>
> The first doesn´t help that much with verbose mode, haven't checked if
> before this series it was failing :-\
>
> root@x1:~# perf test -vvv 105
> 105: perf trace enum augmentation tests:
> --- start ---
> test child forked, pid 19411
> Checking if vmlinux exists
> Tracing syscall landlock_add_rule
> ---- end(-1) ----
> 105: perf trace enum augmentation tests : FAILED!
> root@x1:~#
So:
root@x1:~# perf trace -e landlock_add_rule perf test -w landlock
root@x1:~#
But:
root@x1:~# perf trace perf test -w landlock |& grep landlock_add_rule
26.120 ( 0.002 ms): perf/19791 landlock_add_rule(ruleset_fd: 11, rule_type: LANDLOCK_RULE_PATH_BENEATH, rule_attr: 0x7ffde75e2680, flags: 45) = -1 EINVAL (Invalid argument)
26.124 ( 0.001 ms): perf/19791 landlock_add_rule(ruleset_fd: 11, rule_type: LANDLOCK_RULE_NET_PORT, rule_attr: 0x7ffde75e2690, flags: 45) = -1 EINVAL (Invalid argument)
root@x1:~#
-e is having some trouble, when no event is specified, then it works.
Something in the changes made to:
static int trace__parse_events_option(const struct option *opt, const char *str,
int unset __maybe_unused)
- Arnaldo
More data:
root@x1:~# perf trace -vvv -e landlock_add_rule perf test -w landlock
Using CPUID GenuineIntel-6-BA-3
Opening: cpu/cycles/
------------------------------------------------------------
perf_event_attr:
type 0 (PERF_TYPE_HARDWARE)
config 0xa00000000 (cpu_atom/PERF_COUNT_HW_CPU_CYCLES/)
disabled 1
------------------------------------------------------------
sys_perf_event_open: pid 0 cpu -1 group_fd -1 flags 0x8 = 27
Opening: cpu/cycles/
------------------------------------------------------------
perf_event_attr:
type 0 (PERF_TYPE_HARDWARE)
config 0x400000000 (cpu_core/PERF_COUNT_HW_CPU_CYCLES/)
disabled 1
------------------------------------------------------------
sys_perf_event_open: pid 0 cpu -1 group_fd -1 flags 0x8 = 28
Opening: raw_syscalls:sys_enter
------------------------------------------------------------
perf_event_attr:
type 2 (PERF_TYPE_TRACEPOINT)
size 136
config 0x197 (raw_syscalls:sys_enter)
{ sample_period, sample_freq } 1
sample_type IP|TID|TIME|ID|CPU|PERIOD|RAW
read_format ID|LOST
disabled 1
inherit 1
mmap 1
comm 1
enable_on_exec 1
task 1
sample_id_all 1
mmap2 1
comm_exec 1
ksymbol 1
bpf_event 1
{ wakeup_events, wakeup_watermark } 1
------------------------------------------------------------
sys_perf_event_open: pid 19786 cpu 0 group_fd -1 flags 0x8 = 29
sys_perf_event_open: pid 19786 cpu 1 group_fd -1 flags 0x8 = 30
sys_perf_event_open: pid 19786 cpu 2 group_fd -1 flags 0x8 = 31
sys_perf_event_open: pid 19786 cpu 3 group_fd -1 flags 0x8 = 33
sys_perf_event_open: pid 19786 cpu 4 group_fd -1 flags 0x8 = 34
sys_perf_event_open: pid 19786 cpu 5 group_fd -1 flags 0x8 = 35
sys_perf_event_open: pid 19786 cpu 6 group_fd -1 flags 0x8 = 36
sys_perf_event_open: pid 19786 cpu 7 group_fd -1 flags 0x8 = 37
sys_perf_event_open: pid 19786 cpu 8 group_fd -1 flags 0x8 = 38
sys_perf_event_open: pid 19786 cpu 9 group_fd -1 flags 0x8 = 39
sys_perf_event_open: pid 19786 cpu 10 group_fd -1 flags 0x8 = 40
sys_perf_event_open: pid 19786 cpu 11 group_fd -1 flags 0x8 = 41
Opening: raw_syscalls:sys_exit
------------------------------------------------------------
perf_event_attr:
type 2 (PERF_TYPE_TRACEPOINT)
size 136
config 0x196 (raw_syscalls:sys_exit)
{ sample_period, sample_freq } 1
sample_type IP|TID|TIME|ID|CPU|PERIOD|RAW
read_format ID|LOST
disabled 1
inherit 1
enable_on_exec 1
sample_id_all 1
{ wakeup_events, wakeup_watermark } 1
------------------------------------------------------------
sys_perf_event_open: pid 19786 cpu 0 group_fd -1 flags 0x8 = 42
sys_perf_event_open: pid 19786 cpu 1 group_fd -1 flags 0x8 = 43
sys_perf_event_open: pid 19786 cpu 2 group_fd -1 flags 0x8 = 44
sys_perf_event_open: pid 19786 cpu 3 group_fd -1 flags 0x8 = 45
sys_perf_event_open: pid 19786 cpu 4 group_fd -1 flags 0x8 = 46
sys_perf_event_open: pid 19786 cpu 5 group_fd -1 flags 0x8 = 47
sys_perf_event_open: pid 19786 cpu 6 group_fd -1 flags 0x8 = 48
sys_perf_event_open: pid 19786 cpu 7 group_fd -1 flags 0x8 = 49
sys_perf_event_open: pid 19786 cpu 8 group_fd -1 flags 0x8 = 50
sys_perf_event_open: pid 19786 cpu 9 group_fd -1 flags 0x8 = 51
sys_perf_event_open: pid 19786 cpu 10 group_fd -1 flags 0x8 = 52
sys_perf_event_open: pid 19786 cpu 11 group_fd -1 flags 0x8 = 53
Opening: __augmented_syscalls__
------------------------------------------------------------
perf_event_attr:
type 1 (PERF_TYPE_SOFTWARE)
size 136
config 0xa (PERF_COUNT_SW_BPF_OUTPUT)
{ sample_period, sample_freq } 1
sample_type IP|TID|TIME|ID|CPU|PERIOD|RAW
read_format ID|LOST
disabled 1
enable_on_exec 1
sample_id_all 1
{ wakeup_events, wakeup_watermark } 1
------------------------------------------------------------
sys_perf_event_open: pid 19786 cpu 0 group_fd -1 flags 0x8 = 54
sys_perf_event_open: pid 19786 cpu 1 group_fd -1 flags 0x8 = 55
sys_perf_event_open: pid 19786 cpu 2 group_fd -1 flags 0x8 = 56
sys_perf_event_open: pid 19786 cpu 3 group_fd -1 flags 0x8 = 57
sys_perf_event_open: pid 19786 cpu 4 group_fd -1 flags 0x8 = 58
sys_perf_event_open: pid 19786 cpu 5 group_fd -1 flags 0x8 = 59
sys_perf_event_open: pid 19786 cpu 6 group_fd -1 flags 0x8 = 60
sys_perf_event_open: pid 19786 cpu 7 group_fd -1 flags 0x8 = 61
sys_perf_event_open: pid 19786 cpu 8 group_fd -1 flags 0x8 = 62
sys_perf_event_open: pid 19786 cpu 9 group_fd -1 flags 0x8 = 63
sys_perf_event_open: pid 19786 cpu 10 group_fd -1 flags 0x8 = 64
sys_perf_event_open: pid 19786 cpu 11 group_fd -1 flags 0x8 = 65
Problems reading syscall 156: 2 (No such file or directory)(_sysctl) information
Problems reading syscall 183: 2 (No such file or directory)(afs_syscall) information
Problems reading syscall 174: 2 (No such file or directory)(create_module) information
Problems reading syscall 214: 2 (No such file or directory)(epoll_ctl_old) information
Problems reading syscall 215: 2 (No such file or directory)(epoll_wait_old) information
Problems reading syscall 177: 2 (No such file or directory)(get_kernel_syms) information
Problems reading syscall 211: 2 (No such file or directory)(get_thread_area) information
Problems reading syscall 181: 2 (No such file or directory)(getpmsg) information
vmlinux BTF loaded
Problems reading syscall 212: 2 (No such file or directory)(lookup_dcookie) information
Problems reading syscall 180: 2 (No such file or directory)(nfsservctl) information
Problems reading syscall 182: 2 (No such file or directory)(putpmsg) information
Problems reading syscall 178: 2 (No such file or directory)(query_module) information
Problems reading syscall 185: 2 (No such file or directory)(security) information
Problems reading syscall 205: 2 (No such file or directory)(set_thread_area) information
Problems reading syscall 184: 2 (No such file or directory)(tuxcall) information
Problems reading syscall 134: 2 (No such file or directory)(uselib) information
Problems reading syscall 236: 2 (No such file or directory)(vserver) information
event qualifier tracepoint filter: id == 29098429
mmap size 528384B
libperf: mmap_per_cpu: nr cpu values 12 nr threads 1
libperf: idx 0: mmapping fd 29
<SNIP>
root@x1:~#
root@x1:~# cat /sys/kernel/tracing/events/syscalls/sys_enter_landlock_add_rule/id
1449
root@x1:~# perf trace -e landlock_add_rule perf test -w landlock
root@x1:~# strace -e landlock_add_rule perf test -w landlock
landlock_add_rule(11, LANDLOCK_RULE_PATH_BENEATH, {allowed_access=LANDLOCK_ACCESS_FS_READ_FILE, parent_fd=14}, 0x2d) = -1 EINVAL (Invalid argument)
landlock_add_rule(11, LANDLOCK_RULE_NET_PORT, {allowed_access=LANDLOCK_ACCESS_NET_CONNECT_TCP, port=19}, 0x2d) = -1 EINVAL (Invalid argument)
+++ exited with 0 +++
root@x1:~#
root@x1:~# vim /tmp/build/perf-tools-next/trace/beauty/generated/syscalltbl.c
<SNIP>
static const char *const syscall_num_to_name_EM_X86_64[] = {
[0] = "read",
[1] = "write",
[2] = "open",
<SNIP>
[442] = "mount_setattr",
[443] = "quotactl_fd",
[444] = "landlock_create_ruleset",
[445] = "landlock_add_rule",
[446] = "landlock_restrict_self",
[447] = "memfd_secret",
[448] = "process_mrelease",
[449] = "futex_waitv",
[450] = "set_mempolicy_home_node",
<SNIP>
};
static const uint16_t syscall_sorted_names_EM_X86_64[] = {
156, /* _sysctl */
43, /* accept */
288, /* accept4 */
<SNIP>
246, /* kexec_load */
250, /* keyctl */
62, /* kill */
445, /* landlock_add_rule */
444, /* landlock_create_ruleset */
446, /* landlock_restrict_self */
94, /* lchown */
192, /* lgetxattr */
<SNIP>
};
<SNIP>
#if defined(ALL_SYSCALLTBL) || defined(__i386__) || defined(__x86_64__)
{
.num_to_name = syscall_num_to_name_EM_386,
.sorted_names = syscall_sorted_names_EM_386,
.e_machine = EM_386,
.num_to_name_len = ARRAY_SIZE(syscall_num_to_name_EM_386),
.sorted_names_len = ARRAY_SIZE(syscall_sorted_names_EM_386),
},
{
.num_to_name = syscall_num_to_name_EM_X86_64,
.sorted_names = syscall_sorted_names_EM_X86_64,
.e_machine = EM_X86_64,
.num_to_name_len = ARRAY_SIZE(syscall_num_to_name_EM_X86_64),
.sorted_names_len = ARRAY_SIZE(syscall_sorted_names_EM_X86_64),
},
#endif // defined(ALL_SYSCALLTBL) || defined(__i386__) || defined(__x86_64__)
<SNIP>
On Thu, Mar 13, 2025 at 05:47:27PM -0300, Arnaldo Carvalho de Melo wrote:
> On Thu, Mar 13, 2025 at 05:20:09PM -0300, Arnaldo Carvalho de Melo wrote:
> > Still building, but noticed this on x86_64:
> >
> > 105: perf trace enum augmentation tests : FAILED!
> > 106: perf trace BTF general tests : FAILED!
> > 107: perf trace exit race : Ok
> > 108: perf trace record and replay : FAILED!
> >
> >
> > The first doesn´t help that much with verbose mode, haven't checked if
> > before this series it was failing :-\
> >
> > root@x1:~# perf test -vvv 105
> > 105: perf trace enum augmentation tests:
> > --- start ---
> > test child forked, pid 19411
> > Checking if vmlinux exists
> > Tracing syscall landlock_add_rule
> > ---- end(-1) ----
> > 105: perf trace enum augmentation tests : FAILED!
> > root@x1:~#
>
> So:
>
> root@x1:~# perf trace -e landlock_add_rule perf test -w landlock
> root@x1:~#
>
> But:
>
> root@x1:~# perf trace perf test -w landlock |& grep landlock_add_rule
> 26.120 ( 0.002 ms): perf/19791 landlock_add_rule(ruleset_fd: 11, rule_type: LANDLOCK_RULE_PATH_BENEATH, rule_attr: 0x7ffde75e2680, flags: 45) = -1 EINVAL (Invalid argument)
> 26.124 ( 0.001 ms): perf/19791 landlock_add_rule(ruleset_fd: 11, rule_type: LANDLOCK_RULE_NET_PORT, rule_attr: 0x7ffde75e2690, flags: 45) = -1 EINVAL (Invalid argument)
> root@x1:~#
>
> -e is having some trouble, when no event is specified, then it works.
>
> Something in the changes made to:
>
> static int trace__parse_events_option(const struct option *opt, const char *str,
> int unset __maybe_unused)
Thanks for the test, I think this should fix it:
Thanks,
Namhyung
---8<---
diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c
index ace66e69c1bcde1e..67a8ec10e9e4bc8d 100644
--- a/tools/perf/util/syscalltbl.c
+++ b/tools/perf/util/syscalltbl.c
@@ -76,7 +76,7 @@ int syscalltbl__id(int e_machine, const char *name)
{
const struct syscalltbl *table = find_table(e_machine);
struct syscall_cmp_key key;
- const int *id;
+ const uint16_t *id;
if (!table)
return -1;
On Thu, Mar 13, 2025 at 10:45:49PM -0700, Namhyung Kim wrote:
> On Thu, Mar 13, 2025 at 05:47:27PM -0300, Arnaldo Carvalho de Melo wrote:
> > On Thu, Mar 13, 2025 at 05:20:09PM -0300, Arnaldo Carvalho de Melo wrote:
> > > Still building, but noticed this on x86_64:
> > >
> > > 105: perf trace enum augmentation tests : FAILED!
> > > 106: perf trace BTF general tests : FAILED!
> > > 107: perf trace exit race : Ok
> > > 108: perf trace record and replay : FAILED!
> > >
> > >
> > > The first doesn´t help that much with verbose mode, haven't checked if
> > > before this series it was failing :-\
> > >
> > > root@x1:~# perf test -vvv 105
> > > 105: perf trace enum augmentation tests:
> > > --- start ---
> > > test child forked, pid 19411
> > > Checking if vmlinux exists
> > > Tracing syscall landlock_add_rule
> > > ---- end(-1) ----
> > > 105: perf trace enum augmentation tests : FAILED!
> > > root@x1:~#
> >
> > So:
> >
> > root@x1:~# perf trace -e landlock_add_rule perf test -w landlock
> > root@x1:~#
> >
> > But:
> >
> > root@x1:~# perf trace perf test -w landlock |& grep landlock_add_rule
> > 26.120 ( 0.002 ms): perf/19791 landlock_add_rule(ruleset_fd: 11, rule_type: LANDLOCK_RULE_PATH_BENEATH, rule_attr: 0x7ffde75e2680, flags: 45) = -1 EINVAL (Invalid argument)
> > 26.124 ( 0.001 ms): perf/19791 landlock_add_rule(ruleset_fd: 11, rule_type: LANDLOCK_RULE_NET_PORT, rule_attr: 0x7ffde75e2690, flags: 45) = -1 EINVAL (Invalid argument)
> > root@x1:~#
> >
> > -e is having some trouble, when no event is specified, then it works.
> >
> > Something in the changes made to:
> >
> > static int trace__parse_events_option(const struct option *opt, const char *str,
> > int unset __maybe_unused)
>
> Thanks for the test, I think this should fix it:
>
Well, not really:
root@number:~# perf trace -e landlock_add_rule perf test -w landlock
perf: Segmentation fault
Obtained 10 stack frames.
perf() [0x5be761]
perf() [0x5be7f9]
/lib64/libc.so.6(+0x40fd0) [0x7fe005c4efd0]
perf() [0x491bc1]
perf() [0x497090]
perf() [0x4973ab]
perf() [0x413483]
/lib64/libc.so.6(+0x2a088) [0x7fe005c38088]
/lib64/libc.so.6(__libc_start_main+0x8b) [0x7fe005c3814b]
perf() [0x413ad5]
Segmentation fault (core dumped)
root@number:~#
Time for me to test another patch from Ian, the one symbolizing the
above backtrace...
- Arnaldo
>
>
> ---8<---
> diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c
> index ace66e69c1bcde1e..67a8ec10e9e4bc8d 100644
> --- a/tools/perf/util/syscalltbl.c
> +++ b/tools/perf/util/syscalltbl.c
> @@ -76,7 +76,7 @@ int syscalltbl__id(int e_machine, const char *name)
> {
> const struct syscalltbl *table = find_table(e_machine);
> struct syscall_cmp_key key;
> - const int *id;
> + const uint16_t *id;
>
> if (!table)
> return -1;
On Fri, Mar 14, 2025 at 02:10:54PM -0300, Arnaldo Carvalho de Melo wrote:
> On Thu, Mar 13, 2025 at 10:45:49PM -0700, Namhyung Kim wrote:
> > On Thu, Mar 13, 2025 at 05:47:27PM -0300, Arnaldo Carvalho de Melo wrote:
> > > On Thu, Mar 13, 2025 at 05:20:09PM -0300, Arnaldo Carvalho de Melo wrote:
> > > > Still building, but noticed this on x86_64:
> > > >
> > > > 105: perf trace enum augmentation tests : FAILED!
> > > > 106: perf trace BTF general tests : FAILED!
> > > > 107: perf trace exit race : Ok
> > > > 108: perf trace record and replay : FAILED!
> > > >
> > > >
> > > > The first doesn´t help that much with verbose mode, haven't checked if
> > > > before this series it was failing :-\
> > > >
> > > > root@x1:~# perf test -vvv 105
> > > > 105: perf trace enum augmentation tests:
> > > > --- start ---
> > > > test child forked, pid 19411
> > > > Checking if vmlinux exists
> > > > Tracing syscall landlock_add_rule
> > > > ---- end(-1) ----
> > > > 105: perf trace enum augmentation tests : FAILED!
> > > > root@x1:~#
This one is now ok:
0.004 ( 0.000 ms): perf/200342 landlock_add_rule(ruleset_fd: 11, rule_type: LANDLOCK_RULE_NET_PORT, rule_attr: 0x7ffd649bd0d0, flags: 45) = -1 EINVAL (Invalid argument)
root@number:~# perf test enum
105: perf trace enum augmentation tests : Ok
root@number:~#
now looking at:
root@number:~# perf test -vvvvvvvvv 106
106: perf trace BTF general tests:
--- start ---
test child forked, pid 200467
Checking if vmlinux BTF exists
Testing perf trace's string augmentation
String augmentation test failed
---- end(-1) ----
106: perf trace BTF general tests : FAILED!
root@number:~#
No clue from the test, reading its source code now to see where it is
failing to try and reproduce the problem.
- Arnaldo
On Mon, Mar 17, 2025 at 05:48:10PM -0300, Arnaldo Carvalho de Melo wrote:
> On Fri, Mar 14, 2025 at 02:10:54PM -0300, Arnaldo Carvalho de Melo wrote:
> > On Thu, Mar 13, 2025 at 10:45:49PM -0700, Namhyung Kim wrote:
> > > On Thu, Mar 13, 2025 at 05:47:27PM -0300, Arnaldo Carvalho de Melo wrote:
> > > > On Thu, Mar 13, 2025 at 05:20:09PM -0300, Arnaldo Carvalho de Melo wrote:
> > > > > Still building, but noticed this on x86_64:
> > > > >
> > > > > 105: perf trace enum augmentation tests : FAILED!
> > > > > 106: perf trace BTF general tests : FAILED!
> > > > > 107: perf trace exit race : Ok
> > > > > 108: perf trace record and replay : FAILED!
> > > > >
> > > > >
> > > > > The first doesn´t help that much with verbose mode, haven't checked if
> > > > > before this series it was failing :-\
> > > > >
> > > > > root@x1:~# perf test -vvv 105
> > > > > 105: perf trace enum augmentation tests:
> > > > > --- start ---
> > > > > test child forked, pid 19411
> > > > > Checking if vmlinux exists
> > > > > Tracing syscall landlock_add_rule
> > > > > ---- end(-1) ----
> > > > > 105: perf trace enum augmentation tests : FAILED!
> > > > > root@x1:~#
> This one is now ok:
> 0.004 ( 0.000 ms): perf/200342 landlock_add_rule(ruleset_fd: 11, rule_type: LANDLOCK_RULE_NET_PORT, rule_attr: 0x7ffd649bd0d0, flags: 45) = -1 EINVAL (Invalid argument)
> root@number:~# perf test enum
> 105: perf trace enum augmentation tests : Ok
> root@number:~#
> now looking at:
> root@number:~# perf test -vvvvvvvvv 106
> 106: perf trace BTF general tests:
> --- start ---
> test child forked, pid 200467
> Checking if vmlinux BTF exists
> Testing perf trace's string augmentation
> String augmentation test failed
> ---- end(-1) ----
> 106: perf trace BTF general tests : FAILED!
> root@number:~#
> No clue from the test, reading its source code now to see where it is
> failing to try and reproduce the problem.
root@number:~# rm -f /tmp/1234567 ; touch /tmp/1234567 ; perf trace -e renameat* --max-events=1 -- mv /tmp/1234567 /tmp/abcdefg
? ( ): mv/200698 ... [continued]: renameat2()) = -1 EEXIST (File exists)
root@number:~#
At this point it works:
⬢ [acme@toolbox perf-tools-next]$ git log -1
commit 58f4f294b358861adaee68dfd19da1060058ec27 (HEAD)
Author: James Clark <james.clark@linaro.org>
Date: Mon Jan 6 16:42:58 2025 +0000
perf test trace_btf_general: Fix shellcheck warning
root@number:~# rm -f /tmp/1234567 ; touch /tmp/1234567 ; perf trace -e renameat* --max-events=1 -- mv /tmp/1234567 /tmp/abcdefg
0.000 ( 0.006 ms): mv/218282 renameat2(olddfd: CWD, oldname: "/tmp/1234567", newdfd: CWD, newname: "/tmp/abcdefg", flags: NOREPLACE) = -1 EEXIST (File exists)
root@number:~#
Seems like some transient problem on this test machine, didn't manage to
bisect and now everything seems to work:
Well, not always :-\
root@number:~# perf test 105 106 107 108
105: perf trace enum augmentation tests : Ok
106: perf trace BTF general tests : Ok
107: perf trace exit race : Ok
108: perf trace record and replay : Ok
root@number:~# perf test 105 106 107 108
105: perf trace enum augmentation tests : Ok
106: perf trace BTF general tests : Ok
107: perf trace exit race : Ok
108: perf trace record and replay : Ok
root@number:~# perf test 105 106 107 108
105: perf trace enum augmentation tests : FAILED!
106: perf trace BTF general tests : FAILED!
107: perf trace exit race : Ok
108: perf trace record and replay : Ok
root@number:~# perf test 105 106 107 108
105: perf trace enum augmentation tests : FAILED!
106: perf trace BTF general tests : FAILED!
107: perf trace exit race : Ok
108: perf trace record and replay : Ok
root@number:~# perf test 105 106 107 108
105: perf trace enum augmentation tests : FAILED!
106: perf trace BTF general tests : FAILED!
107: perf trace exit race : Ok
108: perf trace record and replay : Ok
root@number:~# perf test 105 106 107 108
105: perf trace enum augmentation tests : FAILED!
106: perf trace BTF general tests : FAILED!
107: perf trace exit race : Ok
108: perf trace record and replay : Ok
root@number:~# perf test 105 106 107 108
105: perf trace enum augmentation tests : Ok
106: perf trace BTF general tests : FAILED!
107: perf trace exit race : Ok
108: perf trace record and replay : Ok
root@number:~# for test in 105 106 107 108 ; do perf test $test ; done
105: perf trace enum augmentation tests : FAILED!
106: perf trace BTF general tests : FAILED!
107: perf trace exit race : Ok
108: perf trace record and replay : Ok
root@number:~# for test in 105 106 107 108 ; do perf test $test ; done
105: perf trace enum augmentation tests : FAILED!
106: perf trace BTF general tests : FAILED!
107: perf trace exit race : Ok
108: perf trace record and replay : Ok
root@number:~#
So, overall, I think this should land and we should continue trying to
figure out how to find out about the above failure cases, probably the
perf trace cases, since they do set up BPF programs, etc should be done
serially?
Doesn't seem to be the case:
root@number:~# for test in 105 106 107 108 ; do perf test --sequential $test ; done
105: perf trace enum augmentation tests : FAILED!
106: perf trace BTF general tests : Ok
107: perf trace exit race : Ok
108: perf trace record and replay : Ok
root@number:~# for test in 105 106 107 108 ; do perf test --sequential $test ; done
105: perf trace enum augmentation tests : FAILED!
106: perf trace BTF general tests : Ok
107: perf trace exit race : Ok
108: perf trace record and replay : Ok
root@number:~# perf test --sequential 105 106 107 108
105: perf trace enum augmentation tests : FAILED!
106: perf trace BTF general tests : Ok
107: perf trace exit race : Ok
108: perf trace record and replay : Ok
root@number:~#
But then if that is the case it needs some love and care to deal with
other BPF users in the system, being more graceful in the face of
errors.
- Arnaldo
On Fri, Mar 14, 2025 at 02:10:57PM -0300, Arnaldo Carvalho de Melo wrote:
> On Thu, Mar 13, 2025 at 10:45:49PM -0700, Namhyung Kim wrote:
> > On Thu, Mar 13, 2025 at 05:47:27PM -0300, Arnaldo Carvalho de Melo wrote:
> > > On Thu, Mar 13, 2025 at 05:20:09PM -0300, Arnaldo Carvalho de Melo wrote:
> > > > Still building, but noticed this on x86_64:
> > > >
> > > > 105: perf trace enum augmentation tests : FAILED!
> > > > 106: perf trace BTF general tests : FAILED!
> > > > 107: perf trace exit race : Ok
> > > > 108: perf trace record and replay : FAILED!
> > > >
> > > >
> > > > The first doesn´t help that much with verbose mode, haven't checked if
> > > > before this series it was failing :-\
> > > >
> > > > root@x1:~# perf test -vvv 105
> > > > 105: perf trace enum augmentation tests:
> > > > --- start ---
> > > > test child forked, pid 19411
> > > > Checking if vmlinux exists
> > > > Tracing syscall landlock_add_rule
> > > > ---- end(-1) ----
> > > > 105: perf trace enum augmentation tests : FAILED!
> > > > root@x1:~#
> > >
> > > So:
> > >
> > > root@x1:~# perf trace -e landlock_add_rule perf test -w landlock
> > > root@x1:~#
> > >
> > > But:
> > >
> > > root@x1:~# perf trace perf test -w landlock |& grep landlock_add_rule
> > > 26.120 ( 0.002 ms): perf/19791 landlock_add_rule(ruleset_fd: 11, rule_type: LANDLOCK_RULE_PATH_BENEATH, rule_attr: 0x7ffde75e2680, flags: 45) = -1 EINVAL (Invalid argument)
> > > 26.124 ( 0.001 ms): perf/19791 landlock_add_rule(ruleset_fd: 11, rule_type: LANDLOCK_RULE_NET_PORT, rule_attr: 0x7ffde75e2690, flags: 45) = -1 EINVAL (Invalid argument)
> > > root@x1:~#
> > >
> > > -e is having some trouble, when no event is specified, then it works.
> > >
> > > Something in the changes made to:
> > >
> > > static int trace__parse_events_option(const struct option *opt, const char *str,
> > > int unset __maybe_unused)
> >
> > Thanks for the test, I think this should fix it:
> >
>
> Well, not really:
>
> root@number:~# perf trace -e landlock_add_rule perf test -w landlock
> perf: Segmentation fault
> Obtained 10 stack frames.
> perf() [0x5be761]
> perf() [0x5be7f9]
> /lib64/libc.so.6(+0x40fd0) [0x7fe005c4efd0]
> perf() [0x491bc1]
> perf() [0x497090]
> perf() [0x4973ab]
> perf() [0x413483]
> /lib64/libc.so.6(+0x2a088) [0x7fe005c38088]
> /lib64/libc.so.6(__libc_start_main+0x8b) [0x7fe005c3814b]
> perf() [0x413ad5]
> Segmentation fault (core dumped)
> root@number:~#
>
> Time for me to test another patch from Ian, the one symbolizing the
> above backtrace...
>
> Worked, but didn't help as much, with gdb:
>
> (gdb) run trace -e landlock_add_rule perf test -w landlock
> Starting program: /root/bin/perf trace -e landlock_add_rule perf test -w landlock
>
> This GDB supports auto-downloading debuginfo from the following URLs:
> <https://debuginfod.fedoraproject.org/>
> Enable debuginfod for this session? (y or [n]) y
> Debuginfod has been enabled.
> To make this setting permanent, add 'set debuginfod enabled on' to .gdbinit.
> Downloading 53.88 K separate debug info for system-supplied DSO at 0x7ffff7fc6000
> Downloading 458.26 K separate debug info for /lib64/libtracefs.so.1
> [Thread debugging using libthread_db enabled]
> Using host libthread_db library "/lib64/libthread_db.so.1".
> [Detaching after fork from child process 39141]
>
> Program received signal SIGSEGV, Segmentation fault.
> 0x00000000004d0e56 in trace__find_usable_bpf_prog_entry (trace=0x7fffffffa510, sc=0x10fb7b0) at builtin-trace.c:3882
> 3882 bool is_pointer = field->flags & TEP_FIELD_IS_POINTER,
> (gdb) p field
> $1 = (struct tep_format_field *) 0x64656e6769736e75
> (gdb) bt
> #0 0x00000000004d0e56 in trace__find_usable_bpf_prog_entry (trace=0x7fffffffa510, sc=0x10fb7b0) at builtin-trace.c:3882
> #1 0x00000000004cf3de in trace__init_syscalls_bpf_prog_array_maps (trace=0x7fffffffa510, e_machine=62) at builtin-trace.c:4040
> #2 0x00000000004bf626 in trace__run (trace=0x7fffffffa510, argc=4, argv=0x7fffffffde40) at builtin-trace.c:4473
> #3 0x00000000004bb7a9 in cmd_trace (argc=4, argv=0x7fffffffde40) at builtin-trace.c:5741
> #4 0x00000000004d873f in run_builtin (p=0xf83d48 <commands+648>, argc=7, argv=0x7fffffffde40) at perf.c:351
> #5 0x00000000004d7df3 in handle_internal_command (argc=7, argv=0x7fffffffde40) at perf.c:404
> #6 0x00000000004d860f in run_argv (argcp=0x7fffffffdc7c, argv=0x7fffffffdc70) at perf.c:448
> #7 0x00000000004d7a4f in main (argc=7, argv=0x7fffffffde40) at perf.c:556
> (gdb) list -10
> 3867 return NULL;
> 3868
> 3869 try_to_find_pair:
> 3870 for (int i = 0, num_idx = syscalltbl__num_idx(sc->e_machine); i < num_idx; ++i) {
> 3871 int id = syscalltbl__id_at_idx(sc->e_machine, i);
> 3872 struct syscall *pair = trace__syscall_info(trace, NULL, sc->e_machine, id);
> 3873 struct bpf_program *pair_prog;
> 3874 bool is_candidate = false;
> 3875
> 3876 if (pair == NULL || pair == sc ||
> (gdb)
Humm
(gdb) p i
$1 = 147
(gdb) p num_idx
$2 = 379
(gdb) p id
$3 = 192
(gdb) p pair
$4 = (struct syscall *) 0x10fe8f0
(gdb) p *pair
$5 = {e_machine = 62, id = 192, tp_format = 0x10f6c00, nr_args = 3, args_size = 48, bpf_prog = {sys_enter = 0x0, sys_exit = 0x0}, is_exit = false, is_open = false, nonexistent = false,
use_btf = false, args = 0x10f9480, name = 0x814406 "lgetxattr", fmt = 0x0, arg_fmt = 0x10fa0a0}
(gdb) p sc
$6 = (struct syscall *) 0x10fb7b0
(gdb) p sc->args
$7 = (struct tep_format_field *) 0x64656e6769736e75
(gdb) p *pair
$8 = {e_machine = 62, id = 192, tp_format = 0x10f6c00, nr_args = 3, args_size = 48, bpf_prog = {sys_enter = 0x0, sys_exit = 0x0}, is_exit = false, is_open = false, nonexistent = false,
use_btf = false, args = 0x10f9480, name = 0x814406 "lgetxattr", fmt = 0x0, arg_fmt = 0x10fa0a0}
(gdb)
it finds the pair, but then its sc->args has a bogus pointer... I'll see
where this isn't being initialized...
- Arnaldo
On Fri, Mar 14, 2025 at 02:26:41PM -0300, Arnaldo Carvalho de Melo wrote:
> it finds the pair, but then its sc->args has a bogus pointer... I'll see
> where this isn't being initialized...
Breakpoint 4, trace__find_usable_bpf_prog_entry (trace=0x7fffffffa510, sc=0x1046f10) at builtin-trace.c:3874
3874 bool is_candidate = false;
(gdb) n
3876 if (pair == NULL || pair == sc ||
(gdb) p pair
$7 = (struct syscall *) 0x1083c50
(gdb) p pair->name
$8 = 0x81478e "accept4"
(gdb) n
3877 pair->bpf_prog.sys_enter == trace->skel->progs.syscall_unaugmented)
(gdb) p i
$9 = 1
(gdb) n
3876 if (pair == NULL || pair == sc ||
(gdb) n
3880 printf("sc=%p\n", sc); fflush(stdout);
(gdb) n
sc=0x1046f10
3881 printf("sc->name=%p\n", sc->name); fflush(stdout);
(gdb) n
sc->name=0x6c66202c786c3830
3882 printf("sc->nr_args=%d, sc->args=%p\n", sc->nr_args, sc->args); fflush(stdout);
(gdb) p sc->nr_args
$10 = 1935635045
(gdb) p sc->args
$11 = (struct tep_format_field *) 0x257830203a6e656c
(gdb) p *sc
$12 = {e_machine = 540697702, id = 807761968, tp_format = 0x657075202c786c38, nr_args = 1935635045, args_size = 1634427759, bpf_prog = {sys_enter = 0x257830203a726464,
sys_exit = 0x7075202c786c3830}, is_exit = 101, is_open = 101, nonexistent = 114, use_btf = 95, args = 0x257830203a6e656c,
name = 0x6c66202c786c3830 <error: Cannot access memory at address 0x6c66202c786c3830>, fmt = 0x257830203a736761, arg_fmt = 0x786c3830}
(gdb)
Ok, ran out of time, but if I simple avoid the second loop in:
static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace, int e_machine)
I.e. the one that starts with:
/*
* Now lets do a second pass looking for enabled syscalls without
* an augmenter that have a signature that is a superset of another
* syscall with an augmenter so that we can auto-reuse it.
This:
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index e0434f7dc67cb988..3664bb512c70cabf 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -3989,6 +3989,8 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace, int e_m
goto out;
}
+ return 0;
+
/*
* Now lets do a second pass looking for enabled syscalls without
* an augmenter that have a signature that is a superset of another
⬢ [acme@toolbox perf-tools-next]$
Then all works, we don't reuse any BPF program, but then that is an
heuristic anyway, that is tried becuase landlock_add_rule has a pointer
argument:
root@number:~# perf trace -e landlock_add_rule perf test -w landlock
0.000 ( 0.003 ms): perf/71034 landlock_add_rule(ruleset_fd: 11, rule_type: LANDLOCK_RULE_PATH_BENEATH, rule_attr: 0x7fff6f2bb550, flags: 45) = -1 EINVAL (Invalid argument)
0.004 ( 0.001 ms): perf/71034 landlock_add_rule(ruleset_fd: 11, rule_type: LANDLOCK_RULE_NET_PORT, rule_attr: 0x7fff6f2bb540, flags: 45) = -1 EINVAL (Invalid argument)
root@number:~# perf test enum
105: perf trace enum augmentation tests : Ok
root@number:~#
So its some sort of syncronization on the various new tables, sorted by
name, etc that then when iterating over the syscalls ends up using a sc
that is not initialized.
- Arnaldo
On Fri, Mar 14, 2025 at 05:48:12PM -0300, Arnaldo Carvalho de Melo wrote:
> On Fri, Mar 14, 2025 at 02:26:41PM -0300, Arnaldo Carvalho de Melo wrote:
> > it finds the pair, but then its sc->args has a bogus pointer... I'll see
> > where this isn't being initialized...
>
> Breakpoint 4, trace__find_usable_bpf_prog_entry (trace=0x7fffffffa510, sc=0x1046f10) at builtin-trace.c:3874
> 3874 bool is_candidate = false;
> (gdb) n
> 3876 if (pair == NULL || pair == sc ||
> (gdb) p pair
> $7 = (struct syscall *) 0x1083c50
> (gdb) p pair->name
> $8 = 0x81478e "accept4"
> (gdb) n
> 3877 pair->bpf_prog.sys_enter == trace->skel->progs.syscall_unaugmented)
> (gdb) p i
> $9 = 1
> (gdb) n
> 3876 if (pair == NULL || pair == sc ||
> (gdb) n
> 3880 printf("sc=%p\n", sc); fflush(stdout);
> (gdb) n
> sc=0x1046f10
> 3881 printf("sc->name=%p\n", sc->name); fflush(stdout);
> (gdb) n
> sc->name=0x6c66202c786c3830
> 3882 printf("sc->nr_args=%d, sc->args=%p\n", sc->nr_args, sc->args); fflush(stdout);
> (gdb) p sc->nr_args
> $10 = 1935635045
> (gdb) p sc->args
> $11 = (struct tep_format_field *) 0x257830203a6e656c
> (gdb) p *sc
> $12 = {e_machine = 540697702, id = 807761968, tp_format = 0x657075202c786c38, nr_args = 1935635045, args_size = 1634427759, bpf_prog = {sys_enter = 0x257830203a726464,
> sys_exit = 0x7075202c786c3830}, is_exit = 101, is_open = 101, nonexistent = 114, use_btf = 95, args = 0x257830203a6e656c,
> name = 0x6c66202c786c3830 <error: Cannot access memory at address 0x6c66202c786c3830>, fmt = 0x257830203a736761, arg_fmt = 0x786c3830}
> (gdb)
>
> Ok, ran out of time, but if I simple avoid the second loop in:
>
> static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace, int e_machine)
>
>
> I.e. the one that starts with:
>
> /*
> * Now lets do a second pass looking for enabled syscalls without
> * an augmenter that have a signature that is a superset of another
> * syscall with an augmenter so that we can auto-reuse it.
>
> This:
>
> diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
> index e0434f7dc67cb988..3664bb512c70cabf 100644
> --- a/tools/perf/builtin-trace.c
> +++ b/tools/perf/builtin-trace.c
> @@ -3989,6 +3989,8 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace, int e_m
> goto out;
> }
>
> + return 0;
> +
> /*
> * Now lets do a second pass looking for enabled syscalls without
> * an augmenter that have a signature that is a superset of another
> ⬢ [acme@toolbox perf-tools-next]$
>
>
> Then all works, we don't reuse any BPF program, but then that is an
> heuristic anyway, that is tried becuase landlock_add_rule has a pointer
> argument:
>
> root@number:~# perf trace -e landlock_add_rule perf test -w landlock
> 0.000 ( 0.003 ms): perf/71034 landlock_add_rule(ruleset_fd: 11, rule_type: LANDLOCK_RULE_PATH_BENEATH, rule_attr: 0x7fff6f2bb550, flags: 45) = -1 EINVAL (Invalid argument)
> 0.004 ( 0.001 ms): perf/71034 landlock_add_rule(ruleset_fd: 11, rule_type: LANDLOCK_RULE_NET_PORT, rule_attr: 0x7fff6f2bb540, flags: 45) = -1 EINVAL (Invalid argument)
> root@number:~# perf test enum
> 105: perf trace enum augmentation tests : Ok
> root@number:~#
>
> So its some sort of syncronization on the various new tables, sorted by
> name, etc that then when iterating over the syscalls ends up using a sc
> that is not initialized.
Right, I've realized that calling trace__syscall_info() can invalidate
the existing sc since it calls trace__find_syscall() which reallocates
and resorts the syscall table. That's why it was ok when no filter was
used since it'd allocate the whole table in the first pass. Otherwise
it looks for a pair syscall while holding the original sc but calling
the function would invalidate the sc.
What about this (on top of my earlier fix)?
Thanks,
Namhyung
---8<---
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 49199d753b7cafbf..da0ddc713e6b35da 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2506,10 +2506,12 @@ static struct syscall *trace__find_syscall(struct trace *trace, int e_machine, i
};
struct syscall *sc, *tmp;
- sc = bsearch(&key, trace->syscalls.table, trace->syscalls.table_size,
- sizeof(struct syscall), syscall__cmp);
- if (sc)
- return sc;
+ if (trace->syscalls.table) {
+ sc = bsearch(&key, trace->syscalls.table, trace->syscalls.table_size,
+ sizeof(struct syscall), syscall__cmp);
+ if (sc)
+ return sc;
+ }
tmp = reallocarray(trace->syscalls.table, trace->syscalls.table_size + 1,
sizeof(struct syscall));
@@ -3855,6 +3857,10 @@ static int trace__bpf_sys_enter_beauty_map(struct trace *trace, int e_machine, i
static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace, struct syscall *sc)
{
+ int orig_id = sc->id;
+ const char *orig_name = sc->name;
+ int e_machine = sc->e_machine;
+ struct tep_format_field *args = sc->args;
struct tep_format_field *field, *candidate_field;
/*
* We're only interested in syscalls that have a pointer:
@@ -3866,18 +3872,19 @@ static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace
return NULL;
+ /* calling trace__syscall_info() may invalidate 'sc' */
try_to_find_pair:
- for (int i = 0, num_idx = syscalltbl__num_idx(sc->e_machine); i < num_idx; ++i) {
- int id = syscalltbl__id_at_idx(sc->e_machine, i);
- struct syscall *pair = trace__syscall_info(trace, NULL, sc->e_machine, id);
+ for (int i = 0, num_idx = syscalltbl__num_idx(e_machine); i < num_idx; ++i) {
+ int id = syscalltbl__id_at_idx(e_machine, i);
+ struct syscall *pair = trace__syscall_info(trace, NULL, e_machine, id);
struct bpf_program *pair_prog;
bool is_candidate = false;
- if (pair == NULL || pair == sc ||
+ if (pair == NULL || pair->id == orig_id ||
pair->bpf_prog.sys_enter == trace->skel->progs.syscall_unaugmented)
continue;
- for (field = sc->args, candidate_field = pair->args;
+ for (field = args, candidate_field = pair->args;
field && candidate_field; field = field->next, candidate_field = candidate_field->next) {
bool is_pointer = field->flags & TEP_FIELD_IS_POINTER,
candidate_is_pointer = candidate_field->flags & TEP_FIELD_IS_POINTER;
@@ -3944,7 +3951,7 @@ static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace
goto next_candidate;
}
- pr_debug("Reusing \"%s\" BPF sys_enter augmenter for \"%s\"\n", pair->name, sc->name);
+ pr_debug("Reusing \"%s\" BPF sys_enter augmenter for \"%s\"\n", pair->name, orig_name);
return pair_prog;
next_candidate:
continue;
@@ -4041,6 +4048,11 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace, int e_m
if (pair_prog == NULL)
continue;
+ /*
+ * Get syscall info again as find usable entry above might
+ * modify the syscall table and shuffle it.
+ */
+ sc = trace__syscall_info(trace, NULL, e_machine, key);
sc->bpf_prog.sys_enter = pair_prog;
/*
On Sat, Mar 15, 2025 at 4:02 PM Namhyung Kim <namhyung@kernel.org> wrote:
>
> On Fri, Mar 14, 2025 at 05:48:12PM -0300, Arnaldo Carvalho de Melo wrote:
> > On Fri, Mar 14, 2025 at 02:26:41PM -0300, Arnaldo Carvalho de Melo wrote:
> > > it finds the pair, but then its sc->args has a bogus pointer... I'll see
> > > where this isn't being initialized...
> >
> > Breakpoint 4, trace__find_usable_bpf_prog_entry (trace=0x7fffffffa510, sc=0x1046f10) at builtin-trace.c:3874
> > 3874 bool is_candidate = false;
> > (gdb) n
> > 3876 if (pair == NULL || pair == sc ||
> > (gdb) p pair
> > $7 = (struct syscall *) 0x1083c50
> > (gdb) p pair->name
> > $8 = 0x81478e "accept4"
> > (gdb) n
> > 3877 pair->bpf_prog.sys_enter == trace->skel->progs.syscall_unaugmented)
> > (gdb) p i
> > $9 = 1
> > (gdb) n
> > 3876 if (pair == NULL || pair == sc ||
> > (gdb) n
> > 3880 printf("sc=%p\n", sc); fflush(stdout);
> > (gdb) n
> > sc=0x1046f10
> > 3881 printf("sc->name=%p\n", sc->name); fflush(stdout);
> > (gdb) n
> > sc->name=0x6c66202c786c3830
> > 3882 printf("sc->nr_args=%d, sc->args=%p\n", sc->nr_args, sc->args); fflush(stdout);
> > (gdb) p sc->nr_args
> > $10 = 1935635045
> > (gdb) p sc->args
> > $11 = (struct tep_format_field *) 0x257830203a6e656c
> > (gdb) p *sc
> > $12 = {e_machine = 540697702, id = 807761968, tp_format = 0x657075202c786c38, nr_args = 1935635045, args_size = 1634427759, bpf_prog = {sys_enter = 0x257830203a726464,
> > sys_exit = 0x7075202c786c3830}, is_exit = 101, is_open = 101, nonexistent = 114, use_btf = 95, args = 0x257830203a6e656c,
> > name = 0x6c66202c786c3830 <error: Cannot access memory at address 0x6c66202c786c3830>, fmt = 0x257830203a736761, arg_fmt = 0x786c3830}
> > (gdb)
> >
> > Ok, ran out of time, but if I simple avoid the second loop in:
> >
> > static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace, int e_machine)
> >
> >
> > I.e. the one that starts with:
> >
> > /*
> > * Now lets do a second pass looking for enabled syscalls without
> > * an augmenter that have a signature that is a superset of another
> > * syscall with an augmenter so that we can auto-reuse it.
> >
> > This:
> >
> > diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
> > index e0434f7dc67cb988..3664bb512c70cabf 100644
> > --- a/tools/perf/builtin-trace.c
> > +++ b/tools/perf/builtin-trace.c
> > @@ -3989,6 +3989,8 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace, int e_m
> > goto out;
> > }
> >
> > + return 0;
> > +
> > /*
> > * Now lets do a second pass looking for enabled syscalls without
> > * an augmenter that have a signature that is a superset of another
> > ⬢ [acme@toolbox perf-tools-next]$
> >
> >
> > Then all works, we don't reuse any BPF program, but then that is an
> > heuristic anyway, that is tried becuase landlock_add_rule has a pointer
> > argument:
> >
> > root@number:~# perf trace -e landlock_add_rule perf test -w landlock
> > 0.000 ( 0.003 ms): perf/71034 landlock_add_rule(ruleset_fd: 11, rule_type: LANDLOCK_RULE_PATH_BENEATH, rule_attr: 0x7fff6f2bb550, flags: 45) = -1 EINVAL (Invalid argument)
> > 0.004 ( 0.001 ms): perf/71034 landlock_add_rule(ruleset_fd: 11, rule_type: LANDLOCK_RULE_NET_PORT, rule_attr: 0x7fff6f2bb540, flags: 45) = -1 EINVAL (Invalid argument)
> > root@number:~# perf test enum
> > 105: perf trace enum augmentation tests : Ok
> > root@number:~#
> >
> > So its some sort of syncronization on the various new tables, sorted by
> > name, etc that then when iterating over the syscalls ends up using a sc
> > that is not initialized.
>
> Right, I've realized that calling trace__syscall_info() can invalidate
> the existing sc since it calls trace__find_syscall() which reallocates
> and resorts the syscall table. That's why it was ok when no filter was
> used since it'd allocate the whole table in the first pass. Otherwise
> it looks for a pair syscall while holding the original sc but calling
> the function would invalidate the sc.
>
> What about this (on top of my earlier fix)?
LGTM.
Reviewed-by: Ian Rogers <irogers@google.com>
Thanks,
Ian
> Thanks,
> Namhyung
>
>
> ---8<---
> diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
> index 49199d753b7cafbf..da0ddc713e6b35da 100644
> --- a/tools/perf/builtin-trace.c
> +++ b/tools/perf/builtin-trace.c
> @@ -2506,10 +2506,12 @@ static struct syscall *trace__find_syscall(struct trace *trace, int e_machine, i
> };
> struct syscall *sc, *tmp;
>
> - sc = bsearch(&key, trace->syscalls.table, trace->syscalls.table_size,
> - sizeof(struct syscall), syscall__cmp);
> - if (sc)
> - return sc;
> + if (trace->syscalls.table) {
> + sc = bsearch(&key, trace->syscalls.table, trace->syscalls.table_size,
> + sizeof(struct syscall), syscall__cmp);
> + if (sc)
> + return sc;
> + }
>
> tmp = reallocarray(trace->syscalls.table, trace->syscalls.table_size + 1,
> sizeof(struct syscall));
> @@ -3855,6 +3857,10 @@ static int trace__bpf_sys_enter_beauty_map(struct trace *trace, int e_machine, i
>
> static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace, struct syscall *sc)
> {
> + int orig_id = sc->id;
> + const char *orig_name = sc->name;
> + int e_machine = sc->e_machine;
> + struct tep_format_field *args = sc->args;
> struct tep_format_field *field, *candidate_field;
> /*
> * We're only interested in syscalls that have a pointer:
> @@ -3866,18 +3872,19 @@ static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace
>
> return NULL;
>
> + /* calling trace__syscall_info() may invalidate 'sc' */
> try_to_find_pair:
> - for (int i = 0, num_idx = syscalltbl__num_idx(sc->e_machine); i < num_idx; ++i) {
> - int id = syscalltbl__id_at_idx(sc->e_machine, i);
> - struct syscall *pair = trace__syscall_info(trace, NULL, sc->e_machine, id);
> + for (int i = 0, num_idx = syscalltbl__num_idx(e_machine); i < num_idx; ++i) {
> + int id = syscalltbl__id_at_idx(e_machine, i);
> + struct syscall *pair = trace__syscall_info(trace, NULL, e_machine, id);
> struct bpf_program *pair_prog;
> bool is_candidate = false;
>
> - if (pair == NULL || pair == sc ||
> + if (pair == NULL || pair->id == orig_id ||
> pair->bpf_prog.sys_enter == trace->skel->progs.syscall_unaugmented)
> continue;
>
> - for (field = sc->args, candidate_field = pair->args;
> + for (field = args, candidate_field = pair->args;
> field && candidate_field; field = field->next, candidate_field = candidate_field->next) {
> bool is_pointer = field->flags & TEP_FIELD_IS_POINTER,
> candidate_is_pointer = candidate_field->flags & TEP_FIELD_IS_POINTER;
> @@ -3944,7 +3951,7 @@ static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace
> goto next_candidate;
> }
>
> - pr_debug("Reusing \"%s\" BPF sys_enter augmenter for \"%s\"\n", pair->name, sc->name);
> + pr_debug("Reusing \"%s\" BPF sys_enter augmenter for \"%s\"\n", pair->name, orig_name);
> return pair_prog;
> next_candidate:
> continue;
> @@ -4041,6 +4048,11 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace, int e_m
> if (pair_prog == NULL)
> continue;
>
> + /*
> + * Get syscall info again as find usable entry above might
> + * modify the syscall table and shuffle it.
> + */
> + sc = trace__syscall_info(trace, NULL, e_machine, key);
> sc->bpf_prog.sys_enter = pair_prog;
>
> /*
>
© 2016 - 2026 Red Hat, Inc.