[PATCH v2] perf trace: Fix perf trace tracing itself, creating feedback loops

Howard Chu posted 1 patch 3 weeks, 4 days ago
tools/perf/builtin-trace.c | 2 ++
1 file changed, 2 insertions(+)
[PATCH v2] perf trace: Fix perf trace tracing itself, creating feedback loops
Posted by Howard Chu 3 weeks, 4 days ago
There exists a pids_filtered map in augmented_raw_syscalls.bpf.c that
ceases to provide functionality after the BPF skeleton migration:
commit 5e6da6be3082 ("perf trace: Migrate BPF augmentation to use a skeleton")

Before the migration, pid_filtered map works, courtesy of Arnaldo
Carvalho de Melo <acme@kernel.org>:

⬢ [acme@toolbox perf-tools]$ git log --oneline -5
6f769c3458b6cf2d (HEAD) perf tests trace+probe_vfs_getname.sh: Accept quotes surrounding the filename
7777ac3dfe29f55d perf test trace+probe_vfs_getname.sh: Remove stray \ before /
33d9c5062113a4bd perf script python: Add stub for PMU symbol to the python binding
e59fea47f83e8a9a perf symbols: Fix DSO kernel load and symbol process to correctly map DSO to its long_name, type and adjust_symbols
878460e8d0ff84a0 perf build: Remove -Wno-unused-but-set-variable from the flex flags when building with clang < 13.0.0

root@x1:/home/acme/git/perf-tools# perf trace -e /tmp/augmented_raw_syscalls.o -e write* --max-events=30  &
[1] 180632
root@x1:/home/acme/git/perf-tools#      0.000 ( 0.051 ms): NetworkManager/1127 write(fd: 3, buf: 0x7ffeb508ef70, count: 8)                           = 8
     0.115 ( 0.010 ms): NetworkManager/1127 write(fd: 3, buf: 0x7ffeb508ef70, count: 8)                           = 8
     0.916 ( 0.068 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 246)                         = 246
     1.699 ( 0.047 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 121)                         = 121
     2.167 ( 0.041 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 121)                         = 121
     2.739 ( 0.042 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 121)                         = 121
     3.138 ( 0.027 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 121)                         = 121
     3.477 ( 0.027 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 121)                         = 121
     3.738 ( 0.023 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 121)                         = 121
     3.946 ( 0.024 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 121)                         = 121
     4.195 ( 0.024 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 121)                         = 121
     4.212 ( 0.026 ms): NetworkManager/1127 write(fd: 3, buf: 0x7ffeb508ef70, count: 8)                           = 8
     4.285 ( 0.006 ms): NetworkManager/1127 write(fd: 3, buf: 0x7ffeb508ef70, count: 8)                           = 8
     4.445 ( 0.018 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 260)                         = 260
     4.508 ( 0.009 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 124)                         = 124
     4.592 ( 0.010 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 116)                         = 116
     4.666 ( 0.009 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 130)                         = 130
     4.715 ( 0.010 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 95)                          = 95
     4.765 ( 0.007 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 102)                         = 102
     4.815 ( 0.009 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 79)                          = 79
     4.890 ( 0.008 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 57)                          = 57
     4.937 ( 0.007 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 89)                          = 89
     5.009 ( 0.010 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 112)                         = 112
     5.059 ( 0.010 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 112)                         = 112
     5.116 ( 0.007 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 79)                          = 79
     5.152 ( 0.009 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 33)                          = 33
     5.215 ( 0.008 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 37)                          = 37
     5.293 ( 0.010 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 128)                         = 128
     5.339 ( 0.009 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 89)                          = 89
     5.384 ( 0.008 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 100)                         = 100

[1]+  Done                    perf trace -e /tmp/augmented_raw_syscalls.o -e write* --max-events=30
root@x1:/home/acme/git/perf-tools#

No events for the 'perf trace' (pid 180632), i.e. no feedback loop.

If we leave it running:

root@x1:/home/acme/git/perf-tools# perf trace -e /tmp/augmented_raw_syscalls.o -e landlock_add_rule &
[1] 181068
root@x1:/home/acme/git/perf-tools#

And then look at what maps it sets up:

root@x1:/home/acme/git/perf-tools# bpftool map | grep pids_filtered -A3
1190: hash  name pids_filtered  flags 0x0
        key 4B  value 1B  max_entries 64  memlock 7264B
        btf_id 1613
        pids perf(181068)
root@x1:/home/acme/git/perf-tools#

And ask for dumping its contents:

We see that we are _also_ setting it to filter those:

root@x1:/home/acme/git/perf-tools# bpftool map dump id 1190
[{
        "key": 181068,
        "value": 1
    },{
        "key": 156801,
        "value": 1
    }
]

Now testing the migration commit:

perf $ git log
commit 5e6da6be3082f77be06894a1a94d52a90b4007dc (HEAD)
Author: Ian Rogers <irogers@google.com>
Date:   Thu Aug 10 11:48:51 2023 -0700

    perf trace: Migrate BPF augmentation to use a skeleton

perf $ ./perf trace -e write --max-events=10 & echo #!
[1] 1808653

perf $      0.000 ( 0.010 ms): :1808671/1808671 write(fd: 1, buf: 0x6003f5b26fc0, count: 11)                          = 11
     0.162 (         ): perf/1808653 write(fd: 2, buf: 0x7fffc2174e50, count: 11)                       ...
     0.174 (         ): perf/1808653 write(fd: 2, buf: 0x74ce21804563, count: 1)                        ...
     0.184 (         ): perf/1808653 write(fd: 2, buf: 0x57b936589052, count: 5)

The feedback loop is there.

Keep it running, look into the bpf map:

perf $ bpftool map | grep pids_filtered
10675: hash  name pids_filtered  flags 0x0

sberf $ bpftool map dump id 10675
[]

The map is empty.

Now, this commit:
commit 64917f4df048 ("perf trace: Use heuristic when deciding if a syscall tracepoint "const char *" field is really a string")

Temporarily fixed the feedback loop for perf trace -e write, that's
because before using the heuristic, write is hooked to sys_enter_openat:

perf $ git log
commit 83a0943b1870944612a8aa0049f910826ebfd4f7 (HEAD)
Author: Arnaldo Carvalho de Melo <acme@redhat.com>
Date:   Thu Aug 17 12:11:51 2023 -0300

    perf trace: Use the augmented_raw_syscall BPF skel only for tracing syscalls

perf $ ./perf trace -e write --max-events=10 -v 2>&1 | grep Reusing
Reusing "openat" BPF sys_enter augmenter for "write"

And after the heuristic fix, it's unaugmented:

perf $ git log
commit 64917f4df048a0649ea7901c2321f020e71e6f24 (HEAD)
Author: Arnaldo Carvalho de Melo <acme@redhat.com>
Date:   Thu Aug 17 15:14:21 2023 -0300

    perf trace: Use heuristic when deciding if a syscall tracepoint "const char *" field is really a string

perf $ ./perf trace -e write --max-events=10 -v 2>&1 | grep Reusing
perf $

After using the heuristic, write is hooked to syscall_unaugmented, which
returns 1.

SEC("tp/raw_syscalls/sys_enter")
int syscall_unaugmented(struct syscall_enter_args *args)
{
	return 1;
}

If the BPF program returns 1, the tracepoint filter will filter it
(since the tracepoint filter for perf is correctly set), but before the
heuristic, when it was hooked to a sys_enter_openat(), which is a BPF
program that calls bpf_perf_event_output() and writes to the buffer, it
didn't get filtered, thus creating feedback loop. So switching write to
unaugmented accidentally fixed the problem.

But some syscalls are not so lucky, for example newfstatat:
perf $ ./perf trace -e newfstatat --max-events=100 & echo #!
[1] 2166948

   457.718 (         ): perf/2166948 newfstatat(dfd: CWD, filename: "/proc/self/ns/mnt", statbuf: 0x7fff0132a9f0) ...
   457.749 (         ): perf/2166948 newfstatat(dfd: CWD, filename: "/proc/2166950/ns/mnt", statbuf: 0x7fff0132aa80) ...
   457.962 (         ): perf/2166948 newfstatat(dfd: CWD, filename: "/proc/self/ns/mnt", statbuf: 0x7fff0132a9f0) ...

Currently, write is augmented by the new BTF general augmenter (which
calls bpf_perf_event_output()). The problem, which luckily got fixed,
resurfaced, and that’s how it was discovered.

v1:

Currently when tracing system-wide, perf trace will trace itself,
creating feedback loops. This patch fixes this problem by setting the
correct BPF map for filtering pids.

Before:

here perf/2807067 is the tracing process itself):

perf $ ./perf trace -e write --max-events=10
     0.000 ( 0.007 ms): tmux: server/2299109 write(fd: 4, buf: \17, count: 1)                                      = 1 (systemd)
     0.060 (         ): perf/2807067 write(fd: 2, buf:      0.000 , count: 11)                          ...
     0.072 (         ): perf/2807067 write(fd: 2, buf: (, count: 1)                                     ...
     0.085 (         ): perf/2807067 write(fd: 2, buf:  0.007 ms, count: 9)                             ...
     0.089 (         ): perf/2807067 write(fd: 2, buf: ): , count: 3)                                   ...
     0.094 (         ): perf/2807067 write(fd: 2, buf: tmux: server/, count: 13)                        ...
     0.099 (         ): perf/2807067 write(fd: 2, buf: 2299109 , count: 8)                              ...
     0.103 (         ): perf/2807067 write(fd: 2, buf: write(fd: 4, buf: \17, count: 1, count: 31)      ...
     0.108 (         ): perf/2807067 write(fd: 2, buf: )                               , count: 41)     ...
     0.113 (         ): perf/2807067 write(fd: 2, buf: 1, count: 1)                                     ...

After:

perf $ ./perf trace -e write --max-events=10
     0.000 ( 0.030 ms): sshd/2725386 write(fd: 4, buf: r\148\133\163\17\167\194\172bF\231\192\227\194\215\251kBLE\167(\10WY\22\138^\233\28\248\249, count: 36) = 36 (idle_inject/3)
     0.622 ( 0.019 ms): sshd/2725386 write(fd: 4, buf: \177"\251\159\244)F5\224\250\135Y\1865/\30\191\171\140Q\213\182\133\145\224\148\190L\210{\143D, count: 228) =
     9.510 ( 0.014 ms): dirname/2805386 write(fd: 1, buf: /root/.tmux/plugins/tmux-continu, count: 43)        = 43 (kauditd)
     9.788 ( 0.007 ms): bash/2805385 write(fd: 1, buf: /root/.tmux/plugins/tmux-continu, count: 43)        = 43 (kauditd)
    13.865 ( 0.020 ms): :2805390/2805390 write(fd: 1, buf: 1.9\10, count: 4)                                   = 4 (kworker/R-rcu_g)
    15.183 ( 0.015 ms): tr/2805391 write(fd: 1, buf: 19, count: 2)                                       = 2 (kthreadd)
    15.715 ( 0.009 ms): bash/2805388 write(fd: 1, buf: 19\10, count: 3)                                    = 3 (pool_workqueue_)
    18.755 ( 0.014 ms): tmux/2805393 write(fd: 1, buf: tmux 3.4\10, count: 9)                              = 9 (kworker/0:0H-ev)
    19.737 ( 0.044 ms): sshd/2725386 write(fd: 4, buf: \188\197;\82d.1k\197\30\165[L@\153\139\192\173\247k\179kT.m\150\223\216\31\251\255, count: 316) =
    20.173 ( 0.008 ms): bash/2805396 write(fd: 1, buf: tmux 3.4\10, count: 9)                              = 9 (kworker/0:0H-ev)

Fixes: 5e6da6be3082 ("perf trace: Migrate BPF augmentation to use a skeleton")
Signed-off-by: Howard Chu <howardchu95@gmail.com>
---
 tools/perf/builtin-trace.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 748b061f8678..5d83da62275c 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -4326,6 +4326,8 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 					sizeof(__u32), BPF_ANY);
 		}
 	}
+
+	trace->filter_pids.map = trace->skel->maps.pids_filtered;
 #endif
 	err = trace__set_filter_pids(trace);
 	if (err < 0)
-- 
2.43.0

Re: [PATCH v2] perf trace: Fix perf trace tracing itself, creating feedback loops
Posted by Arnaldo Carvalho de Melo 1 week, 5 days ago
On Tue, Oct 29, 2024 at 10:24:31PM -0700, Howard Chu wrote:
> There exists a pids_filtered map in augmented_raw_syscalls.bpf.c that
> ceases to provide functionality after the BPF skeleton migration:
> commit 5e6da6be3082 ("perf trace: Migrate BPF augmentation to use a skeleton")

Thanks, applied to perf-tools-next,

- Arnaldo
 
> Before the migration, pid_filtered map works, courtesy of Arnaldo
> Carvalho de Melo <acme@kernel.org>:
> 
> ⬢ [acme@toolbox perf-tools]$ git log --oneline -5
> 6f769c3458b6cf2d (HEAD) perf tests trace+probe_vfs_getname.sh: Accept quotes surrounding the filename
> 7777ac3dfe29f55d perf test trace+probe_vfs_getname.sh: Remove stray \ before /
> 33d9c5062113a4bd perf script python: Add stub for PMU symbol to the python binding
> e59fea47f83e8a9a perf symbols: Fix DSO kernel load and symbol process to correctly map DSO to its long_name, type and adjust_symbols
> 878460e8d0ff84a0 perf build: Remove -Wno-unused-but-set-variable from the flex flags when building with clang < 13.0.0
> 
> root@x1:/home/acme/git/perf-tools# perf trace -e /tmp/augmented_raw_syscalls.o -e write* --max-events=30  &
> [1] 180632
> root@x1:/home/acme/git/perf-tools#      0.000 ( 0.051 ms): NetworkManager/1127 write(fd: 3, buf: 0x7ffeb508ef70, count: 8)                           = 8
>      0.115 ( 0.010 ms): NetworkManager/1127 write(fd: 3, buf: 0x7ffeb508ef70, count: 8)                           = 8
>      0.916 ( 0.068 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 246)                         = 246
>      1.699 ( 0.047 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 121)                         = 121
>      2.167 ( 0.041 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 121)                         = 121
>      2.739 ( 0.042 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 121)                         = 121
>      3.138 ( 0.027 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 121)                         = 121
>      3.477 ( 0.027 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 121)                         = 121
>      3.738 ( 0.023 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 121)                         = 121
>      3.946 ( 0.024 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 121)                         = 121
>      4.195 ( 0.024 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 121)                         = 121
>      4.212 ( 0.026 ms): NetworkManager/1127 write(fd: 3, buf: 0x7ffeb508ef70, count: 8)                           = 8
>      4.285 ( 0.006 ms): NetworkManager/1127 write(fd: 3, buf: 0x7ffeb508ef70, count: 8)                           = 8
>      4.445 ( 0.018 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 260)                         = 260
>      4.508 ( 0.009 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 124)                         = 124
>      4.592 ( 0.010 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 116)                         = 116
>      4.666 ( 0.009 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 130)                         = 130
>      4.715 ( 0.010 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 95)                          = 95
>      4.765 ( 0.007 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 102)                         = 102
>      4.815 ( 0.009 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 79)                          = 79
>      4.890 ( 0.008 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 57)                          = 57
>      4.937 ( 0.007 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 89)                          = 89
>      5.009 ( 0.010 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 112)                         = 112
>      5.059 ( 0.010 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 112)                         = 112
>      5.116 ( 0.007 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 79)                          = 79
>      5.152 ( 0.009 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 33)                          = 33
>      5.215 ( 0.008 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 37)                          = 37
>      5.293 ( 0.010 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 128)                         = 128
>      5.339 ( 0.009 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 89)                          = 89
>      5.384 ( 0.008 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 100)                         = 100
> 
> [1]+  Done                    perf trace -e /tmp/augmented_raw_syscalls.o -e write* --max-events=30
> root@x1:/home/acme/git/perf-tools#
> 
> No events for the 'perf trace' (pid 180632), i.e. no feedback loop.
> 
> If we leave it running:
> 
> root@x1:/home/acme/git/perf-tools# perf trace -e /tmp/augmented_raw_syscalls.o -e landlock_add_rule &
> [1] 181068
> root@x1:/home/acme/git/perf-tools#
> 
> And then look at what maps it sets up:
> 
> root@x1:/home/acme/git/perf-tools# bpftool map | grep pids_filtered -A3
> 1190: hash  name pids_filtered  flags 0x0
>         key 4B  value 1B  max_entries 64  memlock 7264B
>         btf_id 1613
>         pids perf(181068)
> root@x1:/home/acme/git/perf-tools#
> 
> And ask for dumping its contents:
> 
> We see that we are _also_ setting it to filter those:
> 
> root@x1:/home/acme/git/perf-tools# bpftool map dump id 1190
> [{
>         "key": 181068,
>         "value": 1
>     },{
>         "key": 156801,
>         "value": 1
>     }
> ]
> 
> Now testing the migration commit:
> 
> perf $ git log
> commit 5e6da6be3082f77be06894a1a94d52a90b4007dc (HEAD)
> Author: Ian Rogers <irogers@google.com>
> Date:   Thu Aug 10 11:48:51 2023 -0700
> 
>     perf trace: Migrate BPF augmentation to use a skeleton
> 
> perf $ ./perf trace -e write --max-events=10 & echo #!
> [1] 1808653
> 
> perf $      0.000 ( 0.010 ms): :1808671/1808671 write(fd: 1, buf: 0x6003f5b26fc0, count: 11)                          = 11
>      0.162 (         ): perf/1808653 write(fd: 2, buf: 0x7fffc2174e50, count: 11)                       ...
>      0.174 (         ): perf/1808653 write(fd: 2, buf: 0x74ce21804563, count: 1)                        ...
>      0.184 (         ): perf/1808653 write(fd: 2, buf: 0x57b936589052, count: 5)
> 
> The feedback loop is there.
> 
> Keep it running, look into the bpf map:
> 
> perf $ bpftool map | grep pids_filtered
> 10675: hash  name pids_filtered  flags 0x0
> 
> sberf $ bpftool map dump id 10675
> []
> 
> The map is empty.
> 
> Now, this commit:
> commit 64917f4df048 ("perf trace: Use heuristic when deciding if a syscall tracepoint "const char *" field is really a string")
> 
> Temporarily fixed the feedback loop for perf trace -e write, that's
> because before using the heuristic, write is hooked to sys_enter_openat:
> 
> perf $ git log
> commit 83a0943b1870944612a8aa0049f910826ebfd4f7 (HEAD)
> Author: Arnaldo Carvalho de Melo <acme@redhat.com>
> Date:   Thu Aug 17 12:11:51 2023 -0300
> 
>     perf trace: Use the augmented_raw_syscall BPF skel only for tracing syscalls
> 
> perf $ ./perf trace -e write --max-events=10 -v 2>&1 | grep Reusing
> Reusing "openat" BPF sys_enter augmenter for "write"
> 
> And after the heuristic fix, it's unaugmented:
> 
> perf $ git log
> commit 64917f4df048a0649ea7901c2321f020e71e6f24 (HEAD)
> Author: Arnaldo Carvalho de Melo <acme@redhat.com>
> Date:   Thu Aug 17 15:14:21 2023 -0300
> 
>     perf trace: Use heuristic when deciding if a syscall tracepoint "const char *" field is really a string
> 
> perf $ ./perf trace -e write --max-events=10 -v 2>&1 | grep Reusing
> perf $
> 
> After using the heuristic, write is hooked to syscall_unaugmented, which
> returns 1.
> 
> SEC("tp/raw_syscalls/sys_enter")
> int syscall_unaugmented(struct syscall_enter_args *args)
> {
> 	return 1;
> }
> 
> If the BPF program returns 1, the tracepoint filter will filter it
> (since the tracepoint filter for perf is correctly set), but before the
> heuristic, when it was hooked to a sys_enter_openat(), which is a BPF
> program that calls bpf_perf_event_output() and writes to the buffer, it
> didn't get filtered, thus creating feedback loop. So switching write to
> unaugmented accidentally fixed the problem.
> 
> But some syscalls are not so lucky, for example newfstatat:
> perf $ ./perf trace -e newfstatat --max-events=100 & echo #!
> [1] 2166948
> 
>    457.718 (         ): perf/2166948 newfstatat(dfd: CWD, filename: "/proc/self/ns/mnt", statbuf: 0x7fff0132a9f0) ...
>    457.749 (         ): perf/2166948 newfstatat(dfd: CWD, filename: "/proc/2166950/ns/mnt", statbuf: 0x7fff0132aa80) ...
>    457.962 (         ): perf/2166948 newfstatat(dfd: CWD, filename: "/proc/self/ns/mnt", statbuf: 0x7fff0132a9f0) ...
> 
> Currently, write is augmented by the new BTF general augmenter (which
> calls bpf_perf_event_output()). The problem, which luckily got fixed,
> resurfaced, and that’s how it was discovered.
> 
> v1:
> 
> Currently when tracing system-wide, perf trace will trace itself,
> creating feedback loops. This patch fixes this problem by setting the
> correct BPF map for filtering pids.
> 
> Before:
> 
> here perf/2807067 is the tracing process itself):
> 
> perf $ ./perf trace -e write --max-events=10
>      0.000 ( 0.007 ms): tmux: server/2299109 write(fd: 4, buf: \17, count: 1)                                      = 1 (systemd)
>      0.060 (         ): perf/2807067 write(fd: 2, buf:      0.000 , count: 11)                          ...
>      0.072 (         ): perf/2807067 write(fd: 2, buf: (, count: 1)                                     ...
>      0.085 (         ): perf/2807067 write(fd: 2, buf:  0.007 ms, count: 9)                             ...
>      0.089 (         ): perf/2807067 write(fd: 2, buf: ): , count: 3)                                   ...
>      0.094 (         ): perf/2807067 write(fd: 2, buf: tmux: server/, count: 13)                        ...
>      0.099 (         ): perf/2807067 write(fd: 2, buf: 2299109 , count: 8)                              ...
>      0.103 (         ): perf/2807067 write(fd: 2, buf: write(fd: 4, buf: \17, count: 1, count: 31)      ...
>      0.108 (         ): perf/2807067 write(fd: 2, buf: )                               , count: 41)     ...
>      0.113 (         ): perf/2807067 write(fd: 2, buf: 1, count: 1)                                     ...
> 
> After:
> 
> perf $ ./perf trace -e write --max-events=10
>      0.000 ( 0.030 ms): sshd/2725386 write(fd: 4, buf: r\148\133\163\17\167\194\172bF\231\192\227\194\215\251kBLE\167(\10WY\22\138^\233\28\248\249, count: 36) = 36 (idle_inject/3)
>      0.622 ( 0.019 ms): sshd/2725386 write(fd: 4, buf: \177"\251\159\244)F5\224\250\135Y\1865/\30\191\171\140Q\213\182\133\145\224\148\190L\210{\143D, count: 228) =
>      9.510 ( 0.014 ms): dirname/2805386 write(fd: 1, buf: /root/.tmux/plugins/tmux-continu, count: 43)        = 43 (kauditd)
>      9.788 ( 0.007 ms): bash/2805385 write(fd: 1, buf: /root/.tmux/plugins/tmux-continu, count: 43)        = 43 (kauditd)
>     13.865 ( 0.020 ms): :2805390/2805390 write(fd: 1, buf: 1.9\10, count: 4)                                   = 4 (kworker/R-rcu_g)
>     15.183 ( 0.015 ms): tr/2805391 write(fd: 1, buf: 19, count: 2)                                       = 2 (kthreadd)
>     15.715 ( 0.009 ms): bash/2805388 write(fd: 1, buf: 19\10, count: 3)                                    = 3 (pool_workqueue_)
>     18.755 ( 0.014 ms): tmux/2805393 write(fd: 1, buf: tmux 3.4\10, count: 9)                              = 9 (kworker/0:0H-ev)
>     19.737 ( 0.044 ms): sshd/2725386 write(fd: 4, buf: \188\197;\82d.1k\197\30\165[L@\153\139\192\173\247k\179kT.m\150\223\216\31\251\255, count: 316) =
>     20.173 ( 0.008 ms): bash/2805396 write(fd: 1, buf: tmux 3.4\10, count: 9)                              = 9 (kworker/0:0H-ev)
> 
> Fixes: 5e6da6be3082 ("perf trace: Migrate BPF augmentation to use a skeleton")
> Signed-off-by: Howard Chu <howardchu95@gmail.com>
> ---
>  tools/perf/builtin-trace.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
> index 748b061f8678..5d83da62275c 100644
> --- a/tools/perf/builtin-trace.c
> +++ b/tools/perf/builtin-trace.c
> @@ -4326,6 +4326,8 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
>  					sizeof(__u32), BPF_ANY);
>  		}
>  	}
> +
> +	trace->filter_pids.map = trace->skel->maps.pids_filtered;
>  #endif
>  	err = trace__set_filter_pids(trace);
>  	if (err < 0)
> -- 
> 2.43.0
Re: [PATCH v2] perf trace: Fix perf trace tracing itself, creating feedback loops
Posted by Arnaldo Carvalho de Melo 1 week, 3 days ago
On Tue, Nov 12, 2024 at 02:07:36PM -0300, Arnaldo Carvalho de Melo wrote:
> On Tue, Oct 29, 2024 at 10:24:31PM -0700, Howard Chu wrote:
> > There exists a pids_filtered map in augmented_raw_syscalls.bpf.c that
> > ceases to provide functionality after the BPF skeleton migration:
> > commit 5e6da6be3082 ("perf trace: Migrate BPF augmentation to use a skeleton")
> 
> Thanks, applied to perf-tools-next,

There is a problem with this patch, I'm investigating it now:

root@number:~# perf trace -e syscalls:sys_enter_exit_group true
perf: Segmentation fault
Obtained 12 stack frames.
perf() [0x6229a1]
perf() [0x622a8b]
/lib64/libc.so.6(+0x40d00) [0x7f704364fd00]
perf() [0x4b5f2e]
perf() [0x4ba3e6]
perf() [0x4c03bc]
perf() [0x4c0663]
perf() [0x4c07bc]
perf() [0x4c0b05]
/lib64/libc.so.6(+0x2a088) [0x7f7043639088]
/lib64/libc.so.6(__libc_start_main+0x8b) [0x7f704363914b]
perf() [0x412265]
Segmentation fault (core dumped)
root@number:~#

(gdb) run trace -e syscalls:sys_enter_exit_group true
Starting program: /root/bin/perf trace -e syscalls:sys_enter_exit_group true

This GDB supports auto-downloading debuginfo from the following URLs:
  <https://debuginfod.fedoraproject.org/>
Enable debuginfod for this session? (y or [n]) y
Debuginfod has been enabled.
To make this setting permanent, add 'set debuginfod enabled on' to .gdbinit.
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib64/libthread_db.so.1".
[Detaching after fork from child process 1965197]

Program received signal SIGSEGV, Segmentation fault.
trace__run (trace=0x7fffffffa5e0, argc=1, argv=0x7fffffffde90) at builtin-trace.c:4330
4330		trace->filter_pids.map = trace->skel->maps.pids_filtered;
(gdb) bt
#0  trace__run (trace=0x7fffffffa5e0, argc=1, argv=0x7fffffffde90) at builtin-trace.c:4330
#1  0x00000000004ba3e6 in cmd_trace (argc=1, argv=0x7fffffffde90) at builtin-trace.c:5490
#2  0x00000000004c03bc in run_builtin (p=0xec4068 <commands+648>, argc=4, argv=0x7fffffffde90) at perf.c:351
#3  0x00000000004c0663 in handle_internal_command (argc=4, argv=0x7fffffffde90) at perf.c:404
#4  0x00000000004c07bc in run_argv (argcp=0x7fffffffdc7c, argv=0x7fffffffdc70) at perf.c:448
#5  0x00000000004c0b05 in main (argc=4, argv=0x7fffffffde90) at perf.c:560
(gdb) p trace->skel
$1 = (struct augmented_raw_syscalls_bpf *) 0x0
(gdb)

I.e. when we specify a syscall event tracepoint, we expect to use
augmentation, but are not setting it up.

So this is seems to be exposing a previous bug, I'll check.

- Arnaldo
Re: [PATCH v2] perf trace: Fix perf trace tracing itself, creating feedback loops
Posted by Arnaldo Carvalho de Melo 1 week, 3 days ago
On Thu, Nov 14, 2024 at 02:57:02PM -0300, Arnaldo Carvalho de Melo wrote:
> On Tue, Nov 12, 2024 at 02:07:36PM -0300, Arnaldo Carvalho de Melo wrote:
> > On Tue, Oct 29, 2024 at 10:24:31PM -0700, Howard Chu wrote:
> > > There exists a pids_filtered map in augmented_raw_syscalls.bpf.c that
> > > ceases to provide functionality after the BPF skeleton migration:
> > > commit 5e6da6be3082 ("perf trace: Migrate BPF augmentation to use a skeleton")
> > 
> > Thanks, applied to perf-tools-next,
> 
> There is a problem with this patch, I'm investigating it now:
> 
> root@number:~# perf trace -e syscalls:sys_enter_exit_group true
> perf: Segmentation fault
> Obtained 12 stack frames.
> perf() [0x6229a1]
> perf() [0x622a8b]
> /lib64/libc.so.6(+0x40d00) [0x7f704364fd00]
> perf() [0x4b5f2e]
> perf() [0x4ba3e6]
> perf() [0x4c03bc]
> perf() [0x4c0663]
> perf() [0x4c07bc]
> perf() [0x4c0b05]
> /lib64/libc.so.6(+0x2a088) [0x7f7043639088]
> /lib64/libc.so.6(__libc_start_main+0x8b) [0x7f704363914b]
> perf() [0x412265]
> Segmentation fault (core dumped)
> root@number:~#
> 
> (gdb) run trace -e syscalls:sys_enter_exit_group true
> Starting program: /root/bin/perf trace -e syscalls:sys_enter_exit_group true
> 
> This GDB supports auto-downloading debuginfo from the following URLs:
>   <https://debuginfod.fedoraproject.org/>
> Enable debuginfod for this session? (y or [n]) y
> Debuginfod has been enabled.
> To make this setting permanent, add 'set debuginfod enabled on' to .gdbinit.
> [Thread debugging using libthread_db enabled]
> Using host libthread_db library "/lib64/libthread_db.so.1".
> [Detaching after fork from child process 1965197]
> 
> Program received signal SIGSEGV, Segmentation fault.
> trace__run (trace=0x7fffffffa5e0, argc=1, argv=0x7fffffffde90) at builtin-trace.c:4330
> 4330		trace->filter_pids.map = trace->skel->maps.pids_filtered;
> (gdb) bt
> #0  trace__run (trace=0x7fffffffa5e0, argc=1, argv=0x7fffffffde90) at builtin-trace.c:4330
> #1  0x00000000004ba3e6 in cmd_trace (argc=1, argv=0x7fffffffde90) at builtin-trace.c:5490
> #2  0x00000000004c03bc in run_builtin (p=0xec4068 <commands+648>, argc=4, argv=0x7fffffffde90) at perf.c:351
> #3  0x00000000004c0663 in handle_internal_command (argc=4, argv=0x7fffffffde90) at perf.c:404
> #4  0x00000000004c07bc in run_argv (argcp=0x7fffffffdc7c, argv=0x7fffffffdc70) at perf.c:448
> #5  0x00000000004c0b05 in main (argc=4, argv=0x7fffffffde90) at perf.c:560
> (gdb) p trace->skel
> $1 = (struct augmented_raw_syscalls_bpf *) 0x0
> (gdb)
> 
> I.e. when we specify a syscall event tracepoint, we expect to use
> augmentation, but are not setting it up.

We can't assume that trace->skel will be always set, as it is only
initialized if trace->trace_syscalls is set, so I added this patch on
top of yours:

⬢ [acme@toolbox perf-tools-next]$ git diff
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 0f8678142fbfdbcb..14794725669d49ee 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -4327,7 +4327,8 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
                }
        }
 
-       trace->filter_pids.map = trace->skel->maps.pids_filtered;
+       if (trace->skel)
+               trace->filter_pids.map = trace->skel->maps.pids_filtered;
 #endif
        err = trace__set_filter_pids(trace);
        if (err < 0)
Re: [PATCH v2] perf trace: Fix perf trace tracing itself, creating feedback loops
Posted by Howard Chu 1 week, 3 days ago
Hello,

On Thu, Nov 14, 2024 at 12:03 PM Arnaldo Carvalho de Melo
<acme@kernel.org> wrote:
>
> On Thu, Nov 14, 2024 at 02:57:02PM -0300, Arnaldo Carvalho de Melo wrote:
> > On Tue, Nov 12, 2024 at 02:07:36PM -0300, Arnaldo Carvalho de Melo wrote:
> > > On Tue, Oct 29, 2024 at 10:24:31PM -0700, Howard Chu wrote:
> > > > There exists a pids_filtered map in augmented_raw_syscalls.bpf.c that
> > > > ceases to provide functionality after the BPF skeleton migration:
> > > > commit 5e6da6be3082 ("perf trace: Migrate BPF augmentation to use a skeleton")
> > >
> > > Thanks, applied to perf-tools-next,
> >
> > There is a problem with this patch, I'm investigating it now:
> >
> > root@number:~# perf trace -e syscalls:sys_enter_exit_group true
> > perf: Segmentation fault
> > Obtained 12 stack frames.
> > perf() [0x6229a1]
> > perf() [0x622a8b]
> > /lib64/libc.so.6(+0x40d00) [0x7f704364fd00]
> > perf() [0x4b5f2e]
> > perf() [0x4ba3e6]
> > perf() [0x4c03bc]
> > perf() [0x4c0663]
> > perf() [0x4c07bc]
> > perf() [0x4c0b05]
> > /lib64/libc.so.6(+0x2a088) [0x7f7043639088]
> > /lib64/libc.so.6(__libc_start_main+0x8b) [0x7f704363914b]
> > perf() [0x412265]
> > Segmentation fault (core dumped)
> > root@number:~#
> >
> > (gdb) run trace -e syscalls:sys_enter_exit_group true
> > Starting program: /root/bin/perf trace -e syscalls:sys_enter_exit_group true
> >
> > This GDB supports auto-downloading debuginfo from the following URLs:
> >   <https://debuginfod.fedoraproject.org/>
> > Enable debuginfod for this session? (y or [n]) y
> > Debuginfod has been enabled.
> > To make this setting permanent, add 'set debuginfod enabled on' to .gdbinit.
> > [Thread debugging using libthread_db enabled]
> > Using host libthread_db library "/lib64/libthread_db.so.1".
> > [Detaching after fork from child process 1965197]
> >
> > Program received signal SIGSEGV, Segmentation fault.
> > trace__run (trace=0x7fffffffa5e0, argc=1, argv=0x7fffffffde90) at builtin-trace.c:4330
> > 4330          trace->filter_pids.map = trace->skel->maps.pids_filtered;
> > (gdb) bt
> > #0  trace__run (trace=0x7fffffffa5e0, argc=1, argv=0x7fffffffde90) at builtin-trace.c:4330
> > #1  0x00000000004ba3e6 in cmd_trace (argc=1, argv=0x7fffffffde90) at builtin-trace.c:5490
> > #2  0x00000000004c03bc in run_builtin (p=0xec4068 <commands+648>, argc=4, argv=0x7fffffffde90) at perf.c:351
> > #3  0x00000000004c0663 in handle_internal_command (argc=4, argv=0x7fffffffde90) at perf.c:404
> > #4  0x00000000004c07bc in run_argv (argcp=0x7fffffffdc7c, argv=0x7fffffffdc70) at perf.c:448
> > #5  0x00000000004c0b05 in main (argc=4, argv=0x7fffffffde90) at perf.c:560
> > (gdb) p trace->skel
> > $1 = (struct augmented_raw_syscalls_bpf *) 0x0
> > (gdb)
> >
> > I.e. when we specify a syscall event tracepoint, we expect to use
> > augmentation, but are not setting it up.
>
> We can't assume that trace->skel will be always set, as it is only
> initialized if trace->trace_syscalls is set, so I added this patch on
> top of yours:

Yeah, using syscall tracepoints won't load the BPF skeleton.

perf $ ./perf trace -e syscalls:sys_enter_exit_group
     0.000 dirname/3732242 syscalls:sys_enter_exit_group()
     0.268 bash/3732241 syscalls:sys_enter_exit_group()
     3.951 :3732246/3732246 syscalls:sys_enter_exit_group()
     4.756 tr/3732247 syscalls:sys_enter_exit_group()
     4.945 :3732245/3732245 syscalls:sys_enter_exit_group()
     5.235 bash/3732244 syscalls:sys_enter_exit_group()
     7.975 tmux/3732249 syscalls:sys_enter_exit_group()
     9.160 :3732252/3732252 syscalls:sys_enter_exit_group()
    10.062 tr/3732253 syscalls:sys_enter_exit_group()
    10.308 :3732251/3732251 syscalls:sys_enter_exit_group()
    10.619 bash/3732250 syscalls:sys_enter_exit_group()
    10.853 bash/3732248 syscalls:sys_enter_exit_group()
    11.143 bash/3732243 syscalls:sys_enter_exit_group()
    15.379 tmux: client/3732255 syscalls:sys_enter_exit_group()
    15.985 sh/3732254 syscalls:sys_enter_exit_group()
    20.111 tmux: client/3732257 syscalls:sys_enter_exit_group()
    20.697 sh/3732256 syscalls:sys_enter_exit_group()
    24.849 tmux: client/3732259 syscalls:sys_enter_exit_group()
    25.461 sh/3732258 syscalls:sys_enter_exit_group()
    27.087 date/3732261 syscalls:sys_enter_exit_group()
    27.333 sh/3732260 syscalls:sys_enter_exit_group()
    27.596 sh/3732240 syscalls:sys_enter_exit_group()
    27.850 tmux: server/3732239 syscalls:sys_enter_exit_group()
^Cperf $

>
> ⬢ [acme@toolbox perf-tools-next]$ git diff
> diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
> index 0f8678142fbfdbcb..14794725669d49ee 100644
> --- a/tools/perf/builtin-trace.c
> +++ b/tools/perf/builtin-trace.c
> @@ -4327,7 +4327,8 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
>                 }
>         }
>
> -       trace->filter_pids.map = trace->skel->maps.pids_filtered;
> +       if (trace->skel)
> +               trace->filter_pids.map = trace->skel->maps.pids_filtered;

LGTM.

>  #endif
>         err = trace__set_filter_pids(trace);
>         if (err < 0)

Tested-by: Howard Chu <howardchu95@gmail.com>

Thanks,
Howard
Re: [PATCH v2] perf trace: Fix perf trace tracing itself, creating feedback loops
Posted by Howard Chu 2 weeks, 1 day ago
Hello,

Kindly ping. :)

You can test it simply by using  ./perf trace -e newfstatat
--max-events=30 & echo #!  and see if there are pids the same as
echoed, or if there are "perf" in trace messages

     9.321 (         ): perf/3159266 newfstatat(dfd: CWD, filename:
"/proc/3159273/ns/mnt", statbuf: 0x7ffefbfce7b0) ...

Thanks,
Howard

On Tue, Oct 29, 2024 at 10:24 PM Howard Chu <howardchu95@gmail.com> wrote:
>
> There exists a pids_filtered map in augmented_raw_syscalls.bpf.c that
> ceases to provide functionality after the BPF skeleton migration:
> commit 5e6da6be3082 ("perf trace: Migrate BPF augmentation to use a skeleton")
>
> Before the migration, pid_filtered map works, courtesy of Arnaldo
> Carvalho de Melo <acme@kernel.org>:
>
> ⬢ [acme@toolbox perf-tools]$ git log --oneline -5
> 6f769c3458b6cf2d (HEAD) perf tests trace+probe_vfs_getname.sh: Accept quotes surrounding the filename
> 7777ac3dfe29f55d perf test trace+probe_vfs_getname.sh: Remove stray \ before /
> 33d9c5062113a4bd perf script python: Add stub for PMU symbol to the python binding
> e59fea47f83e8a9a perf symbols: Fix DSO kernel load and symbol process to correctly map DSO to its long_name, type and adjust_symbols
> 878460e8d0ff84a0 perf build: Remove -Wno-unused-but-set-variable from the flex flags when building with clang < 13.0.0
>
> root@x1:/home/acme/git/perf-tools# perf trace -e /tmp/augmented_raw_syscalls.o -e write* --max-events=30  &
> [1] 180632
> root@x1:/home/acme/git/perf-tools#      0.000 ( 0.051 ms): NetworkManager/1127 write(fd: 3, buf: 0x7ffeb508ef70, count: 8)                           = 8
>      0.115 ( 0.010 ms): NetworkManager/1127 write(fd: 3, buf: 0x7ffeb508ef70, count: 8)                           = 8
>      0.916 ( 0.068 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 246)                         = 246
>      1.699 ( 0.047 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 121)                         = 121
>      2.167 ( 0.041 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 121)                         = 121
>      2.739 ( 0.042 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 121)                         = 121
>      3.138 ( 0.027 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 121)                         = 121
>      3.477 ( 0.027 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 121)                         = 121
>      3.738 ( 0.023 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 121)                         = 121
>      3.946 ( 0.024 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 121)                         = 121
>      4.195 ( 0.024 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 121)                         = 121
>      4.212 ( 0.026 ms): NetworkManager/1127 write(fd: 3, buf: 0x7ffeb508ef70, count: 8)                           = 8
>      4.285 ( 0.006 ms): NetworkManager/1127 write(fd: 3, buf: 0x7ffeb508ef70, count: 8)                           = 8
>      4.445 ( 0.018 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 260)                         = 260
>      4.508 ( 0.009 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 124)                         = 124
>      4.592 ( 0.010 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 116)                         = 116
>      4.666 ( 0.009 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 130)                         = 130
>      4.715 ( 0.010 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 95)                          = 95
>      4.765 ( 0.007 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 102)                         = 102
>      4.815 ( 0.009 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 79)                          = 79
>      4.890 ( 0.008 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 57)                          = 57
>      4.937 ( 0.007 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 89)                          = 89
>      5.009 ( 0.010 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 112)                         = 112
>      5.059 ( 0.010 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 112)                         = 112
>      5.116 ( 0.007 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 79)                          = 79
>      5.152 ( 0.009 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 33)                          = 33
>      5.215 ( 0.008 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 37)                          = 37
>      5.293 ( 0.010 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 128)                         = 128
>      5.339 ( 0.009 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 89)                          = 89
>      5.384 ( 0.008 ms): sudo/156867 write(fd: 8, buf: 0x55cb4cd2f650, count: 100)                         = 100
>
> [1]+  Done                    perf trace -e /tmp/augmented_raw_syscalls.o -e write* --max-events=30
> root@x1:/home/acme/git/perf-tools#
>
> No events for the 'perf trace' (pid 180632), i.e. no feedback loop.
>
> If we leave it running:
>
> root@x1:/home/acme/git/perf-tools# perf trace -e /tmp/augmented_raw_syscalls.o -e landlock_add_rule &
> [1] 181068
> root@x1:/home/acme/git/perf-tools#
>
> And then look at what maps it sets up:
>
> root@x1:/home/acme/git/perf-tools# bpftool map | grep pids_filtered -A3
> 1190: hash  name pids_filtered  flags 0x0
>         key 4B  value 1B  max_entries 64  memlock 7264B
>         btf_id 1613
>         pids perf(181068)
> root@x1:/home/acme/git/perf-tools#
>
> And ask for dumping its contents:
>
> We see that we are _also_ setting it to filter those:
>
> root@x1:/home/acme/git/perf-tools# bpftool map dump id 1190
> [{
>         "key": 181068,
>         "value": 1
>     },{
>         "key": 156801,
>         "value": 1
>     }
> ]
>
> Now testing the migration commit:
>
> perf $ git log
> commit 5e6da6be3082f77be06894a1a94d52a90b4007dc (HEAD)
> Author: Ian Rogers <irogers@google.com>
> Date:   Thu Aug 10 11:48:51 2023 -0700
>
>     perf trace: Migrate BPF augmentation to use a skeleton
>
> perf $ ./perf trace -e write --max-events=10 & echo #!
> [1] 1808653
>
> perf $      0.000 ( 0.010 ms): :1808671/1808671 write(fd: 1, buf: 0x6003f5b26fc0, count: 11)                          = 11
>      0.162 (         ): perf/1808653 write(fd: 2, buf: 0x7fffc2174e50, count: 11)                       ...
>      0.174 (         ): perf/1808653 write(fd: 2, buf: 0x74ce21804563, count: 1)                        ...
>      0.184 (         ): perf/1808653 write(fd: 2, buf: 0x57b936589052, count: 5)
>
> The feedback loop is there.
>
> Keep it running, look into the bpf map:
>
> perf $ bpftool map | grep pids_filtered
> 10675: hash  name pids_filtered  flags 0x0
>
> sberf $ bpftool map dump id 10675
> []
>
> The map is empty.
>
> Now, this commit:
> commit 64917f4df048 ("perf trace: Use heuristic when deciding if a syscall tracepoint "const char *" field is really a string")
>
> Temporarily fixed the feedback loop for perf trace -e write, that's
> because before using the heuristic, write is hooked to sys_enter_openat:
>
> perf $ git log
> commit 83a0943b1870944612a8aa0049f910826ebfd4f7 (HEAD)
> Author: Arnaldo Carvalho de Melo <acme@redhat.com>
> Date:   Thu Aug 17 12:11:51 2023 -0300
>
>     perf trace: Use the augmented_raw_syscall BPF skel only for tracing syscalls
>
> perf $ ./perf trace -e write --max-events=10 -v 2>&1 | grep Reusing
> Reusing "openat" BPF sys_enter augmenter for "write"
>
> And after the heuristic fix, it's unaugmented:
>
> perf $ git log
> commit 64917f4df048a0649ea7901c2321f020e71e6f24 (HEAD)
> Author: Arnaldo Carvalho de Melo <acme@redhat.com>
> Date:   Thu Aug 17 15:14:21 2023 -0300
>
>     perf trace: Use heuristic when deciding if a syscall tracepoint "const char *" field is really a string
>
> perf $ ./perf trace -e write --max-events=10 -v 2>&1 | grep Reusing
> perf $
>
> After using the heuristic, write is hooked to syscall_unaugmented, which
> returns 1.
>
> SEC("tp/raw_syscalls/sys_enter")
> int syscall_unaugmented(struct syscall_enter_args *args)
> {
>         return 1;
> }
>
> If the BPF program returns 1, the tracepoint filter will filter it
> (since the tracepoint filter for perf is correctly set), but before the
> heuristic, when it was hooked to a sys_enter_openat(), which is a BPF
> program that calls bpf_perf_event_output() and writes to the buffer, it
> didn't get filtered, thus creating feedback loop. So switching write to
> unaugmented accidentally fixed the problem.
>
> But some syscalls are not so lucky, for example newfstatat:
> perf $ ./perf trace -e newfstatat --max-events=100 & echo #!
> [1] 2166948
>
>    457.718 (         ): perf/2166948 newfstatat(dfd: CWD, filename: "/proc/self/ns/mnt", statbuf: 0x7fff0132a9f0) ...
>    457.749 (         ): perf/2166948 newfstatat(dfd: CWD, filename: "/proc/2166950/ns/mnt", statbuf: 0x7fff0132aa80) ...
>    457.962 (         ): perf/2166948 newfstatat(dfd: CWD, filename: "/proc/self/ns/mnt", statbuf: 0x7fff0132a9f0) ...
>
> Currently, write is augmented by the new BTF general augmenter (which
> calls bpf_perf_event_output()). The problem, which luckily got fixed,
> resurfaced, and that’s how it was discovered.
>
> v1:
>
> Currently when tracing system-wide, perf trace will trace itself,
> creating feedback loops. This patch fixes this problem by setting the
> correct BPF map for filtering pids.
>
> Before:
>
> here perf/2807067 is the tracing process itself):
>
> perf $ ./perf trace -e write --max-events=10
>      0.000 ( 0.007 ms): tmux: server/2299109 write(fd: 4, buf: \17, count: 1)                                      = 1 (systemd)
>      0.060 (         ): perf/2807067 write(fd: 2, buf:      0.000 , count: 11)                          ...
>      0.072 (         ): perf/2807067 write(fd: 2, buf: (, count: 1)                                     ...
>      0.085 (         ): perf/2807067 write(fd: 2, buf:  0.007 ms, count: 9)                             ...
>      0.089 (         ): perf/2807067 write(fd: 2, buf: ): , count: 3)                                   ...
>      0.094 (         ): perf/2807067 write(fd: 2, buf: tmux: server/, count: 13)                        ...
>      0.099 (         ): perf/2807067 write(fd: 2, buf: 2299109 , count: 8)                              ...
>      0.103 (         ): perf/2807067 write(fd: 2, buf: write(fd: 4, buf: \17, count: 1, count: 31)      ...
>      0.108 (         ): perf/2807067 write(fd: 2, buf: )                               , count: 41)     ...
>      0.113 (         ): perf/2807067 write(fd: 2, buf: 1, count: 1)                                     ...
>
> After:
>
> perf $ ./perf trace -e write --max-events=10
>      0.000 ( 0.030 ms): sshd/2725386 write(fd: 4, buf: r\148\133\163\17\167\194\172bF\231\192\227\194\215\251kBLE\167(\10WY\22\138^\233\28\248\249, count: 36) = 36 (idle_inject/3)
>      0.622 ( 0.019 ms): sshd/2725386 write(fd: 4, buf: \177"\251\159\244)F5\224\250\135Y\1865/\30\191\171\140Q\213\182\133\145\224\148\190L\210{\143D, count: 228) =
>      9.510 ( 0.014 ms): dirname/2805386 write(fd: 1, buf: /root/.tmux/plugins/tmux-continu, count: 43)        = 43 (kauditd)
>      9.788 ( 0.007 ms): bash/2805385 write(fd: 1, buf: /root/.tmux/plugins/tmux-continu, count: 43)        = 43 (kauditd)
>     13.865 ( 0.020 ms): :2805390/2805390 write(fd: 1, buf: 1.9\10, count: 4)                                   = 4 (kworker/R-rcu_g)
>     15.183 ( 0.015 ms): tr/2805391 write(fd: 1, buf: 19, count: 2)                                       = 2 (kthreadd)
>     15.715 ( 0.009 ms): bash/2805388 write(fd: 1, buf: 19\10, count: 3)                                    = 3 (pool_workqueue_)
>     18.755 ( 0.014 ms): tmux/2805393 write(fd: 1, buf: tmux 3.4\10, count: 9)                              = 9 (kworker/0:0H-ev)
>     19.737 ( 0.044 ms): sshd/2725386 write(fd: 4, buf: \188\197;\82d.1k\197\30\165[L@\153\139\192\173\247k\179kT.m\150\223\216\31\251\255, count: 316) =
>     20.173 ( 0.008 ms): bash/2805396 write(fd: 1, buf: tmux 3.4\10, count: 9)                              = 9 (kworker/0:0H-ev)
>
> Fixes: 5e6da6be3082 ("perf trace: Migrate BPF augmentation to use a skeleton")
> Signed-off-by: Howard Chu <howardchu95@gmail.com>
> ---
>  tools/perf/builtin-trace.c | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
> index 748b061f8678..5d83da62275c 100644
> --- a/tools/perf/builtin-trace.c
> +++ b/tools/perf/builtin-trace.c
> @@ -4326,6 +4326,8 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
>                                         sizeof(__u32), BPF_ANY);
>                 }
>         }
> +
> +       trace->filter_pids.map = trace->skel->maps.pids_filtered;
>  #endif
>         err = trace__set_filter_pids(trace);
>         if (err < 0)
> --
> 2.43.0
>