Previously a BPF event of augmented_raw_syscalls.c could be used to
enable augmentation of syscalls by perf trace. As BPF events are no
longer supported, switch to using a BPF skeleton which when attached
explicitly opens the sysenter and sysexit tracepoints.
The dump map is removed as debugging wasn't supported by the
augmentation and bpf_printk can be used when necessary.
Remove tools/perf/examples/bpf/augmented_raw_syscalls.c so that the
rename/migration to a BPF skeleton captures that this was the source.
Signed-off-by: Ian Rogers <irogers@google.com>
---
tools/perf/Makefile.perf | 1 +
tools/perf/builtin-trace.c | 180 +++++++++++-------
.../bpf_skel/augmented_raw_syscalls.bpf.c} | 27 +--
3 files changed, 131 insertions(+), 77 deletions(-)
rename tools/perf/{examples/bpf/augmented_raw_syscalls.c => util/bpf_skel/augmented_raw_syscalls.bpf.c} (96%)
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 6ec5079fd697..0e1597712b95 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -1042,6 +1042,7 @@ SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h
SKELETONS += $(SKEL_OUT)/off_cpu.skel.h $(SKEL_OUT)/lock_contention.skel.h
SKELETONS += $(SKEL_OUT)/kwork_trace.skel.h $(SKEL_OUT)/sample_filter.skel.h
SKELETONS += $(SKEL_OUT)/bench_uprobe.skel.h
+SKELETONS += $(SKEL_OUT)/augmented_raw_syscalls.skel.h
$(SKEL_TMP_OUT) $(LIBAPI_OUTPUT) $(LIBBPF_OUTPUT) $(LIBPERF_OUTPUT) $(LIBSUBCMD_OUTPUT) $(LIBSYMBOL_OUTPUT):
$(Q)$(MKDIR) -p $@
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 59862467e781..8625fca42cd8 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -19,6 +19,9 @@
#ifdef HAVE_LIBBPF_SUPPORT
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
+#ifdef HAVE_BPF_SKEL
+#include "bpf_skel/augmented_raw_syscalls.skel.h"
+#endif
#endif
#include "util/bpf_map.h"
#include "util/rlimit.h"
@@ -127,25 +130,19 @@ struct trace {
struct syscalltbl *sctbl;
struct {
struct syscall *table;
- struct { // per syscall BPF_MAP_TYPE_PROG_ARRAY
- struct bpf_map *sys_enter,
- *sys_exit;
- } prog_array;
struct {
struct evsel *sys_enter,
- *sys_exit,
- *augmented;
+ *sys_exit,
+ *bpf_output;
} events;
- struct bpf_program *unaugmented_prog;
} syscalls;
- struct {
- struct bpf_map *map;
- } dump;
+#ifdef HAVE_BPF_SKEL
+ struct augmented_raw_syscalls_bpf *skel;
+#endif
struct record_opts opts;
struct evlist *evlist;
struct machine *host;
struct thread *current;
- struct bpf_object *bpf_obj;
struct cgroup *cgroup;
u64 base_time;
FILE *output;
@@ -415,6 +412,7 @@ static int evsel__init_syscall_tp(struct evsel *evsel)
if (evsel__init_tp_uint_field(evsel, &sc->id, "__syscall_nr") &&
evsel__init_tp_uint_field(evsel, &sc->id, "nr"))
return -ENOENT;
+
return 0;
}
@@ -2845,7 +2843,7 @@ static int trace__event_handler(struct trace *trace, struct evsel *evsel,
if (thread)
trace__fprintf_comm_tid(trace, thread, trace->output);
- if (evsel == trace->syscalls.events.augmented) {
+ if (evsel == trace->syscalls.events.bpf_output) {
int id = perf_evsel__sc_tp_uint(evsel, id, sample);
struct syscall *sc = trace__syscall_info(trace, evsel, id);
@@ -3278,24 +3276,16 @@ static int trace__set_ev_qualifier_tp_filter(struct trace *trace)
goto out;
}
-#ifdef HAVE_LIBBPF_SUPPORT
-static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace, const char *name)
-{
- if (trace->bpf_obj == NULL)
- return NULL;
-
- return bpf_object__find_map_by_name(trace->bpf_obj, name);
-}
-
+#ifdef HAVE_BPF_SKEL
static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace, const char *name)
{
struct bpf_program *pos, *prog = NULL;
const char *sec_name;
- if (trace->bpf_obj == NULL)
+ if (trace->skel->obj == NULL)
return NULL;
- bpf_object__for_each_program(pos, trace->bpf_obj) {
+ bpf_object__for_each_program(pos, trace->skel->obj) {
sec_name = bpf_program__section_name(pos);
if (sec_name && !strcmp(sec_name, name)) {
prog = pos;
@@ -3313,12 +3303,14 @@ static struct bpf_program *trace__find_syscall_bpf_prog(struct trace *trace, str
if (prog_name == NULL) {
char default_prog_name[256];
- scnprintf(default_prog_name, sizeof(default_prog_name), "!syscalls:sys_%s_%s", type, sc->name);
+ scnprintf(default_prog_name, sizeof(default_prog_name), "tp/syscalls/sys_%s_%s",
+ type, sc->name);
prog = trace__find_bpf_program_by_title(trace, default_prog_name);
if (prog != NULL)
goto out_found;
if (sc->fmt && sc->fmt->alias) {
- scnprintf(default_prog_name, sizeof(default_prog_name), "!syscalls:sys_%s_%s", type, sc->fmt->alias);
+ scnprintf(default_prog_name, sizeof(default_prog_name),
+ "tp/syscalls/sys_%s_%s", type, sc->fmt->alias);
prog = trace__find_bpf_program_by_title(trace, default_prog_name);
if (prog != NULL)
goto out_found;
@@ -3336,7 +3328,7 @@ static struct bpf_program *trace__find_syscall_bpf_prog(struct trace *trace, str
pr_debug("Couldn't find BPF prog \"%s\" to associate with syscalls:sys_%s_%s, not augmenting it\n",
prog_name, type, sc->name);
out_unaugmented:
- return trace->syscalls.unaugmented_prog;
+ return trace->skel->progs.syscall_unaugmented;
}
static void trace__init_syscall_bpf_progs(struct trace *trace, int id)
@@ -3353,13 +3345,21 @@ static void trace__init_syscall_bpf_progs(struct trace *trace, int id)
static int trace__bpf_prog_sys_enter_fd(struct trace *trace, int id)
{
struct syscall *sc = trace__syscall_info(trace, NULL, id);
- return sc ? bpf_program__fd(sc->bpf_prog.sys_enter) : bpf_program__fd(trace->syscalls.unaugmented_prog);
+
+ if (sc)
+ return bpf_program__fd(sc->bpf_prog.sys_enter);
+
+ return bpf_program__fd(trace->skel->progs.syscall_unaugmented);
}
static int trace__bpf_prog_sys_exit_fd(struct trace *trace, int id)
{
struct syscall *sc = trace__syscall_info(trace, NULL, id);
- return sc ? bpf_program__fd(sc->bpf_prog.sys_exit) : bpf_program__fd(trace->syscalls.unaugmented_prog);
+
+ if (sc)
+ return bpf_program__fd(sc->bpf_prog.sys_exit);
+
+ return bpf_program__fd(trace->skel->progs.syscall_unaugmented);
}
static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace, struct syscall *sc)
@@ -3384,7 +3384,7 @@ static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace
bool is_candidate = false;
if (pair == NULL || pair == sc ||
- pair->bpf_prog.sys_enter == trace->syscalls.unaugmented_prog)
+ pair->bpf_prog.sys_enter == trace->skel->progs.syscall_unaugmented)
continue;
for (field = sc->args, candidate_field = pair->args;
@@ -3437,7 +3437,7 @@ static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace
*/
if (pair_prog == NULL) {
pair_prog = trace__find_syscall_bpf_prog(trace, pair, pair->fmt ? pair->fmt->bpf_prog_name.sys_enter : NULL, "enter");
- if (pair_prog == trace->syscalls.unaugmented_prog)
+ if (pair_prog == trace->skel->progs.syscall_unaugmented)
goto next_candidate;
}
@@ -3452,8 +3452,8 @@ static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace
static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace)
{
- int map_enter_fd = bpf_map__fd(trace->syscalls.prog_array.sys_enter),
- map_exit_fd = bpf_map__fd(trace->syscalls.prog_array.sys_exit);
+ int map_enter_fd = bpf_map__fd(trace->skel->maps.syscalls_sys_enter);
+ int map_exit_fd = bpf_map__fd(trace->skel->maps.syscalls_sys_exit);
int err = 0, key;
for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) {
@@ -3515,7 +3515,7 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace)
* For now we're just reusing the sys_enter prog, and if it
* already has an augmenter, we don't need to find one.
*/
- if (sc->bpf_prog.sys_enter != trace->syscalls.unaugmented_prog)
+ if (sc->bpf_prog.sys_enter != trace->skel->progs.syscall_unaugmented)
continue;
/*
@@ -3538,22 +3538,9 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace)
break;
}
-
return err;
}
-
-#else // HAVE_LIBBPF_SUPPORT
-static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace __maybe_unused,
- const char *name __maybe_unused)
-{
- return NULL;
-}
-
-static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace __maybe_unused)
-{
- return 0;
-}
-#endif // HAVE_LIBBPF_SUPPORT
+#endif // HAVE_BPF_SKEL
static int trace__set_ev_qualifier_filter(struct trace *trace)
{
@@ -3917,13 +3904,31 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
err = evlist__open(evlist);
if (err < 0)
goto out_error_open;
+#ifdef HAVE_BPF_SKEL
+ {
+ struct perf_cpu cpu;
+ /*
+ * Set up the __augmented_syscalls__ BPF map to hold for each
+ * CPU the bpf-output event's file descriptor.
+ */
+ perf_cpu_map__for_each_cpu(cpu, i, trace->syscalls.events.bpf_output->core.cpus) {
+ bpf_map__update_elem(trace->skel->maps.__augmented_syscalls__,
+ &cpu.cpu, sizeof(int),
+ xyarray__entry(trace->syscalls.events.bpf_output->core.fd,
+ cpu.cpu, 0),
+ sizeof(__u32), BPF_ANY);
+ }
+ }
+#endif
err = trace__set_filter_pids(trace);
if (err < 0)
goto out_error_mem;
- if (trace->syscalls.prog_array.sys_enter)
+#ifdef HAVE_BPF_SKEL
+ if (trace->skel->progs.sys_enter)
trace__init_syscalls_bpf_prog_array_maps(trace);
+#endif
if (trace->ev_qualifier_ids.nr > 0) {
err = trace__set_ev_qualifier_filter(trace);
@@ -3956,9 +3961,6 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
if (err < 0)
goto out_error_apply_filters;
- if (trace->dump.map)
- bpf_map__fprintf(trace->dump.map, trace->output);
-
err = evlist__mmap(evlist, trace->opts.mmap_pages);
if (err < 0)
goto out_error_mmap;
@@ -4655,6 +4657,18 @@ static void trace__exit(struct trace *trace)
zfree(&trace->perfconfig_events);
}
+#ifdef HAVE_BPF_SKEL
+static int bpf__setup_bpf_output(struct evlist *evlist)
+{
+ int err = parse_event(evlist, "bpf-output/no-inherit=1,name=__augmented_syscalls__/");
+
+ if (err)
+ pr_debug("ERROR: failed to create the \"__augmented_syscalls__\" bpf-output event\n");
+
+ return err;
+}
+#endif
+
int cmd_trace(int argc, const char **argv)
{
const char *trace_usage[] = {
@@ -4686,7 +4700,6 @@ int cmd_trace(int argc, const char **argv)
.max_stack = UINT_MAX,
.max_events = ULONG_MAX,
};
- const char *map_dump_str = NULL;
const char *output_name = NULL;
const struct option trace_options[] = {
OPT_CALLBACK('e', "event", &trace, "event",
@@ -4720,9 +4733,6 @@ int cmd_trace(int argc, const char **argv)
OPT_CALLBACK(0, "duration", &trace, "float",
"show only events with duration > N.M ms",
trace__set_duration),
-#ifdef HAVE_LIBBPF_SUPPORT
- OPT_STRING(0, "map-dump", &map_dump_str, "BPF map", "BPF map to periodically dump"),
-#endif
OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
OPT_INCR('v', "verbose", &verbose, "be more verbose"),
OPT_BOOLEAN('T', "time", &trace.full_time,
@@ -4849,16 +4859,55 @@ int cmd_trace(int argc, const char **argv)
"cgroup monitoring only available in system-wide mode");
}
- err = -1;
+#ifdef HAVE_BPF_SKEL
+ trace.skel = augmented_raw_syscalls_bpf__open();
+ if (!trace.skel) {
+ pr_debug("Failed to open augmented syscalls BPF skeleton");
+ } else {
+ /*
+ * Disable attaching the BPF programs except for sys_enter and
+ * sys_exit that tail call into this as necessary.
+ */
+ bpf_program__set_autoattach(trace.skel->progs.syscall_unaugmented,
+ /*autoattach=*/false);
+ bpf_program__set_autoattach(trace.skel->progs.sys_enter_connect,
+ /*autoattach=*/false);
+ bpf_program__set_autoattach(trace.skel->progs.sys_enter_sendto,
+ /*autoattach=*/false);
+ bpf_program__set_autoattach(trace.skel->progs.sys_enter_open,
+ /*autoattach=*/false);
+ bpf_program__set_autoattach(trace.skel->progs.sys_enter_openat,
+ /*autoattach=*/false);
+ bpf_program__set_autoattach(trace.skel->progs.sys_enter_rename,
+ /*autoattach=*/false);
+ bpf_program__set_autoattach(trace.skel->progs.sys_enter_renameat,
+ /*autoattach=*/false);
+ bpf_program__set_autoattach(trace.skel->progs.sys_enter_perf_event_open,
+ /*autoattach=*/false);
+ bpf_program__set_autoattach(trace.skel->progs.sys_enter_clock_nanosleep,
+ /*autoattach=*/false);
+
+ err = augmented_raw_syscalls_bpf__load(trace.skel);
- if (map_dump_str) {
- trace.dump.map = trace__find_bpf_map_by_name(&trace, map_dump_str);
- if (trace.dump.map == NULL) {
- pr_err("ERROR: BPF map \"%s\" not found\n", map_dump_str);
- goto out;
+ if (err < 0) {
+ pr_debug("Failed to load augmented syscalls BPF skeleton\n");
+ } else {
+ augmented_raw_syscalls_bpf__attach(trace.skel);
+ trace__add_syscall_newtp(&trace);
}
}
+ err = bpf__setup_bpf_output(trace.evlist);
+ if (err) {
+ libbpf_strerror(err, bf, sizeof(bf));
+ pr_err("ERROR: Setup BPF output event failed: %s\n", bf);
+ goto out;
+ }
+ trace.syscalls.events.bpf_output = evlist__last(trace.evlist);
+ assert(!strcmp(evsel__name(trace.syscalls.events.bpf_output), "__augmented_syscalls__"));
+#endif
+ err = -1;
+
if (trace.trace_pgfaults) {
trace.opts.sample_address = true;
trace.opts.sample_time = true;
@@ -4909,7 +4958,7 @@ int cmd_trace(int argc, const char **argv)
* buffers that are being copied from kernel to userspace, think 'read'
* syscall.
*/
- if (trace.syscalls.events.augmented) {
+ if (trace.syscalls.events.bpf_output) {
evlist__for_each_entry(trace.evlist, evsel) {
bool raw_syscalls_sys_exit = strcmp(evsel__name(evsel), "raw_syscalls:sys_exit") == 0;
@@ -4918,9 +4967,9 @@ int cmd_trace(int argc, const char **argv)
goto init_augmented_syscall_tp;
}
- if (trace.syscalls.events.augmented->priv == NULL &&
+ if (trace.syscalls.events.bpf_output->priv == NULL &&
strstr(evsel__name(evsel), "syscalls:sys_enter")) {
- struct evsel *augmented = trace.syscalls.events.augmented;
+ struct evsel *augmented = trace.syscalls.events.bpf_output;
if (evsel__init_augmented_syscall_tp(augmented, evsel) ||
evsel__init_augmented_syscall_tp_args(augmented))
goto out;
@@ -5025,5 +5074,8 @@ int cmd_trace(int argc, const char **argv)
fclose(trace.output);
out:
trace__exit(&trace);
+#ifdef HAVE_BPF_SKEL
+ augmented_raw_syscalls_bpf__destroy(trace.skel);
+#endif
return err;
}
diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
similarity index 96%
rename from tools/perf/examples/bpf/augmented_raw_syscalls.c
rename to tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
index 9a03189d33d3..70478b9460ee 100644
--- a/tools/perf/examples/bpf/augmented_raw_syscalls.c
+++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
@@ -18,6 +18,8 @@
#include <bpf/bpf_helpers.h>
#include <linux/limits.h>
+#define MAX_CPUS 4096
+
// FIXME: These should come from system headers
typedef char bool;
typedef int pid_t;
@@ -34,7 +36,7 @@ struct __augmented_syscalls__ {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
__type(key, int);
__type(value, __u32);
- __uint(max_entries, __NR_CPUS__);
+ __uint(max_entries, MAX_CPUS);
} __augmented_syscalls__ SEC(".maps");
/*
@@ -170,7 +172,7 @@ unsigned int augmented_arg__read_str(struct augmented_arg *augmented_arg, const
return augmented_len;
}
-SEC("!raw_syscalls:unaugmented")
+SEC("tp/raw_syscalls/sys_enter")
int syscall_unaugmented(struct syscall_enter_args *args)
{
return 1;
@@ -182,7 +184,7 @@ int syscall_unaugmented(struct syscall_enter_args *args)
* on from there, reading the first syscall arg as a string, i.e. open's
* filename.
*/
-SEC("!syscalls:sys_enter_connect")
+SEC("tp/syscalls/sys_enter_connect")
int sys_enter_connect(struct syscall_enter_args *args)
{
struct augmented_args_payload *augmented_args = augmented_args_payload();
@@ -201,7 +203,7 @@ int sys_enter_connect(struct syscall_enter_args *args)
return augmented__output(args, augmented_args, len + socklen);
}
-SEC("!syscalls:sys_enter_sendto")
+SEC("tp/syscalls/sys_enter_sendto")
int sys_enter_sendto(struct syscall_enter_args *args)
{
struct augmented_args_payload *augmented_args = augmented_args_payload();
@@ -220,7 +222,7 @@ int sys_enter_sendto(struct syscall_enter_args *args)
return augmented__output(args, augmented_args, len + socklen);
}
-SEC("!syscalls:sys_enter_open")
+SEC("tp/syscalls/sys_enter_open")
int sys_enter_open(struct syscall_enter_args *args)
{
struct augmented_args_payload *augmented_args = augmented_args_payload();
@@ -235,7 +237,7 @@ int sys_enter_open(struct syscall_enter_args *args)
return augmented__output(args, augmented_args, len);
}
-SEC("!syscalls:sys_enter_openat")
+SEC("tp/syscalls/sys_enter_openat")
int sys_enter_openat(struct syscall_enter_args *args)
{
struct augmented_args_payload *augmented_args = augmented_args_payload();
@@ -250,7 +252,7 @@ int sys_enter_openat(struct syscall_enter_args *args)
return augmented__output(args, augmented_args, len);
}
-SEC("!syscalls:sys_enter_rename")
+SEC("tp/syscalls/sys_enter_rename")
int sys_enter_rename(struct syscall_enter_args *args)
{
struct augmented_args_payload *augmented_args = augmented_args_payload();
@@ -267,7 +269,7 @@ int sys_enter_rename(struct syscall_enter_args *args)
return augmented__output(args, augmented_args, len);
}
-SEC("!syscalls:sys_enter_renameat")
+SEC("tp/syscalls/sys_enter_renameat")
int sys_enter_renameat(struct syscall_enter_args *args)
{
struct augmented_args_payload *augmented_args = augmented_args_payload();
@@ -295,7 +297,7 @@ struct perf_event_attr_size {
__u32 size;
};
-SEC("!syscalls:sys_enter_perf_event_open")
+SEC("tp/syscalls/sys_enter_perf_event_open")
int sys_enter_perf_event_open(struct syscall_enter_args *args)
{
struct augmented_args_payload *augmented_args = augmented_args_payload();
@@ -327,7 +329,7 @@ int sys_enter_perf_event_open(struct syscall_enter_args *args)
return 1; /* Failure: don't filter */
}
-SEC("!syscalls:sys_enter_clock_nanosleep")
+SEC("tp/syscalls/sys_enter_clock_nanosleep")
int sys_enter_clock_nanosleep(struct syscall_enter_args *args)
{
struct augmented_args_payload *augmented_args = augmented_args_payload();
@@ -358,7 +360,7 @@ static bool pid_filter__has(struct pids_filtered *pids, pid_t pid)
return bpf_map_lookup_elem(pids, &pid) != NULL;
}
-SEC("raw_syscalls:sys_enter")
+SEC("tp/raw_syscalls/sys_enter")
int sys_enter(struct syscall_enter_args *args)
{
struct augmented_args_payload *augmented_args;
@@ -371,7 +373,6 @@ int sys_enter(struct syscall_enter_args *args)
* We'll add to this as we add augmented syscalls right after that
* initial, non-augmented raw_syscalls:sys_enter payload.
*/
- unsigned int len = sizeof(augmented_args->args);
if (pid_filter__has(&pids_filtered, getpid()))
return 0;
@@ -393,7 +394,7 @@ int sys_enter(struct syscall_enter_args *args)
return 0;
}
-SEC("raw_syscalls:sys_exit")
+SEC("tp/raw_syscalls/sys_exit")
int sys_exit(struct syscall_exit_args *args)
{
struct syscall_exit_args exit_args;
--
2.41.0.640.ga95def55d0-goog
Em Thu, Aug 10, 2023 at 11:48:51AM -0700, Ian Rogers escreveu:
> Previously a BPF event of augmented_raw_syscalls.c could be used to
> enable augmentation of syscalls by perf trace. As BPF events are no
> longer supported, switch to using a BPF skeleton which when attached
> explicitly opens the sysenter and sysexit tracepoints.
>
> The dump map is removed as debugging wasn't supported by the
> augmentation and bpf_printk can be used when necessary.
>
> Remove tools/perf/examples/bpf/augmented_raw_syscalls.c so that the
> rename/migration to a BPF skeleton captures that this was the source.
> +#ifdef HAVE_BPF_SKEL
> + trace.skel = augmented_raw_syscalls_bpf__open();
> + if (!trace.skel) {
> + pr_debug("Failed to open augmented syscalls BPF skeleton");
> + } else {
> + /*
> + * Disable attaching the BPF programs except for sys_enter and
> + * sys_exit that tail call into this as necessary.
> + */
> + bpf_program__set_autoattach(trace.skel->progs.syscall_unaugmented,
> + /*autoattach=*/false);
> + bpf_program__set_autoattach(trace.skel->progs.sys_enter_connect,
> + /*autoattach=*/false);
> + bpf_program__set_autoattach(trace.skel->progs.sys_enter_sendto,
> + /*autoattach=*/false);
> + bpf_program__set_autoattach(trace.skel->progs.sys_enter_open,
> + /*autoattach=*/false);
> + bpf_program__set_autoattach(trace.skel->progs.sys_enter_openat,
> + /*autoattach=*/false);
> + bpf_program__set_autoattach(trace.skel->progs.sys_enter_rename,
> + /*autoattach=*/false);
> + bpf_program__set_autoattach(trace.skel->progs.sys_enter_renameat,
> + /*autoattach=*/false);
> + bpf_program__set_autoattach(trace.skel->progs.sys_enter_perf_event_open,
> + /*autoattach=*/false);
> + bpf_program__set_autoattach(trace.skel->progs.sys_enter_clock_nanosleep,
> + /*autoattach=*/false);
> +
> + err = augmented_raw_syscalls_bpf__load(trace.skel);
>
So I converted the above to:
struct bpf_program *prog;
bpf_object__for_each_program(prog, trace.skel->obj) {
if (prog != trace.skel->progs.sys_enter && prog != trace.skel->progs.sys_exit)
bpf_program__set_autoattach(prog, /*autoattach=*/false);
}
So that we don't have to add new lines disabling attachment when adding
support for other pointer receiving syscalls.
- Arnaldo
Em Thu, Aug 10, 2023 at 11:48:51AM -0700, Ian Rogers escreveu:
> Previously a BPF event of augmented_raw_syscalls.c could be used to
> enable augmentation of syscalls by perf trace. As BPF events are no
> longer supported, switch to using a BPF skeleton which when attached
> explicitly opens the sysenter and sysexit tracepoints.
>
> The dump map is removed as debugging wasn't supported by the
> augmentation and bpf_printk can be used when necessary.
>
> Remove tools/perf/examples/bpf/augmented_raw_syscalls.c so that the
> rename/migration to a BPF skeleton captures that this was the source.
So, there is a problem where the augmented_raw_syscalls connect/sendto
handlers are being rejected by the verifier, the way you did it makes it
to print the verifier output and then continue without augmentation,
unsure if this is a good default, opinions?
[root@quaco ~]# perf trace -e open*
libbpf: prog 'sys_enter_connect': BPF program load failed: Permission denied
libbpf: prog 'sys_enter_connect': -- BEGIN PROG LOAD LOG --
reg type unsupported for arg#0 function sys_enter_connect#59
0: R1=ctx(off=0,imm=0) R10=fp0
; int sys_enter_connect(struct syscall_enter_args *args)
0: (bf) r6 = r1 ; R1=ctx(off=0,imm=0) R6_w=ctx(off=0,imm=0)
1: (b7) r1 = 0 ; R1_w=0
; int key = 0;
2: (63) *(u32 *)(r10 -4) = r1 ; R1_w=0 R10=fp0 fp-8=0000????
3: (bf) r2 = r10 ; R2_w=fp0 R10=fp0
;
4: (07) r2 += -4 ; R2_w=fp-4
; return bpf_map_lookup_elem(&augmented_args_tmp, &key);
5: (18) r1 = 0xffff8de5ae1d4600 ; R1_w=map_ptr(off=0,ks=4,vs=8272,imm=0)
7: (85) call bpf_map_lookup_elem#1 ; R0_w=map_value_or_null(id=1,off=0,ks=4,vs=8272,imm=0)
8: (bf) r7 = r0 ; R0_w=map_value_or_null(id=1,off=0,ks=4,vs=8272,imm=0) R7_w=map_value_or_null(id=1,off=0,ks=4,vs=8272,imm=0)
9: (b7) r0 = 1 ; R0_w=1
; if (augmented_args == NULL)
10: (15) if r7 == 0x0 goto pc+25 ; R7_w=map_value(off=0,ks=4,vs=8272,imm=0)
; unsigned int socklen = args->args[2];
11: (79) r1 = *(u64 *)(r6 +32) ; R1_w=scalar() R6_w=ctx(off=0,imm=0)
;
12: (bf) r2 = r1 ; R1_w=scalar(id=2) R2_w=scalar(id=2)
13: (67) r2 <<= 32 ; R2_w=scalar(smax=9223372032559808512,umax=18446744069414584320,var_off=(0x0; 0xffffffff00000000),s32_min=0,s32_max=0,u32_max=0)
14: (77) r2 >>= 32 ; R2_w=scalar(umax=4294967295,var_off=(0x0; 0xffffffff))
15: (b7) r8 = 128 ; R8=128
; if (socklen > sizeof(augmented_args->saddr))
16: (25) if r2 > 0x80 goto pc+1 ; R2=scalar(umax=128,var_off=(0x0; 0xff))
17: (bf) r8 = r1 ; R1=scalar(id=2) R8_w=scalar(id=2)
; const void *sockaddr_arg = (const void *)args->args[1];
18: (79) r3 = *(u64 *)(r6 +24) ; R3_w=scalar() R6=ctx(off=0,imm=0)
; bpf_probe_read(&augmented_args->saddr, socklen, sockaddr_arg);
19: (bf) r1 = r7 ; R1_w=map_value(off=0,ks=4,vs=8272,imm=0) R7=map_value(off=0,ks=4,vs=8272,imm=0)
20: (07) r1 += 64 ; R1_w=map_value(off=64,ks=4,vs=8272,imm=0)
; bpf_probe_read(&augmented_args->saddr, socklen, sockaddr_arg);
21: (bf) r2 = r8 ; R2_w=scalar(id=2) R8_w=scalar(id=2)
22: (85) call bpf_probe_read#4
R2 min value is negative, either use unsigned or 'var &= const'
processed 22 insns (limit 1000000) max_states_per_insn 0 total_states 1 peak_states 1 mark_read 1
-- END PROG LOAD LOG --
libbpf: prog 'sys_enter_connect': failed to load: -13
libbpf: failed to load object 'augmented_raw_syscalls_bpf'
libbpf: failed to load BPF skeleton 'augmented_raw_syscalls_bpf': -13
0.000 systemd-oomd/959 openat(dfd: CWD, filename: 0xc0a2a2bd, flags: RDONLY|CLOEXEC) = 12
86.339 thermald/1234 openat(dfd: CWD, filename: 0xac000ba0) = 13
87.008 thermald/1234 openat(dfd: CWD, filename: 0xac000eb0) = 13
87.270 thermald/1234 openat(dfd: CWD, filename: 0xac000b70) = 13
89.657 thermald/1234 openat(dfd: CWD, filename: 0xac000eb0) = 13
^C
If I comment out the connect and sendto it doesn't build anymore,
whereas before it would continue with the other handlers:
CLANG /tmp/build/perf-tools-next/util/bpf_skel/.tmp/augmented_raw_syscalls.bpf.o
GENSKEL /tmp/build/perf-tools-next/util/bpf_skel/augmented_raw_syscalls.skel.h
CC /tmp/build/perf-tools-next/builtin-trace.o
builtin-trace.c: In function ‘cmd_trace’:
builtin-trace.c:4873:63: error: ‘struct <anonymous>’ has no member named ‘sys_enter_connect’; did you mean ‘sys_enter_openat’?
4873 | bpf_program__set_autoattach(trace.skel->progs.sys_enter_connect,
| ^~~~~~~~~~~~~~~~~
| sys_enter_openat
builtin-trace.c:4875:63: error: ‘struct <anonymous>’ has no member named ‘sys_enter_sendto’; did you mean ‘sys_enter_openat’?
4875 | bpf_program__set_autoattach(trace.skel->progs.sys_enter_sendto,
| ^~~~~~~~~~~~~~~~
| sys_enter_openat
make[3]: *** [/home/acme/git/perf-tools-next/tools/build/Makefile.build:97: /tmp/build/perf-tools-next/builtin-trace.o] Error 1
make[2]: *** [Makefile.perf:662: /tmp/build/perf-tools-next/perf-in.o] Error 2
make[1]: *** [Makefile.perf:238: sub-make] Error 2
make: *** [Makefile:113: install-bin] Error 2
make: Leaving directory '/home/acme/git/perf-tools-next/tools/perf'
[acme@quaco perf-tools-next]$
I.e. no need for explicitely referring to those, I think in the past it
was just looking if it was there and if so, attaching, I'll try to fix
this.
If I remove the explicit references in builtin-trace.c:
[root@quaco ~]# perf trace -e open* --max-events=10
0.000 thermald/1234 openat(dfd: CWD, filename: "/sys/class/powercap/intel-rapl/intel-rapl:0/intel-rapl:0:2/energy_uj") = 13
0.236 thermald/1234 openat(dfd: CWD, filename: "/sys/class/powercap/intel-rapl/intel-rapl:0/energy_uj") = 13
0.334 thermald/1234 openat(dfd: CWD, filename: "/sys/class/thermal/thermal_zone2/temp") = 13
9.092 systemd-oomd/959 openat(dfd: CWD, filename: "/proc/meminfo", flags: RDONLY|CLOEXEC) = 12
259.212 systemd-oomd/959 openat(dfd: CWD, filename: "/proc/meminfo", flags: RDONLY|CLOEXEC) = 12
497.464 gpm/1049 openat(dfd: CWD, filename: "/dev/tty0") = 4
509.044 systemd-oomd/959 openat(dfd: CWD, filename: "/proc/meminfo", flags: RDONLY|CLOEXEC) = 12
509.559 systemd-oomd/959 openat(dfd: CWD, filename: "/sys/fs/cgroup/user.slice/user-1000.slice/user@1000.service/session.slice/memory.pressure", flags: RDONLY|CLOEXEC) = 12
509.917 systemd-oomd/959 openat(dfd: CWD, filename: "/sys/fs/cgroup/user.slice/user-1000.slice/user@1000.service/session.slice/memory.current", flags: RDONLY|CLOEXEC) = 12
510.111 systemd-oomd/959 openat(dfd: CWD, filename: "/sys/fs/cgroup/user.slice/user-1000.slice/user@1000.service/session.slice/memory.min", flags: RDONLY|CLOEXEC) = 12
[root@quaco ~]#
Cool!
Some inception:
[root@quaco ~]# perf trace -e perf_event_open perf stat -e cycles,instructions,cache-misses sleep 1
0.000 perf_event_open(attr_uptr: { type: 0 (PERF_TYPE_HARDWARE), size: 136, config: 0 (PERF_COUNT_HW_CPU_CYCLES), sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 232297 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 3
0.063 perf_event_open(attr_uptr: { type: 0 (PERF_TYPE_HARDWARE), size: 136, config: 0x1 (PERF_COUNT_HW_INSTRUCTIONS), sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 232297 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 4
0.070 perf_event_open(attr_uptr: { type: 0 (PERF_TYPE_HARDWARE), size: 136, config: 0x3 (PERF_COUNT_HW_CACHE_MISSES), sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 232297 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 5
Performance counter stats for 'sleep 1':
2,669,464 cycles
1,842,319 instructions # 0.69 insn per cycle
27,716 cache-misses
1.001948592 seconds time elapsed
0.000000000 seconds user
0.001657000 seconds sys
[root@quaco ~]#
I'm putting what I have in the tmp.perf-tools-next branch, will continue
later today.
- Arnaldo
On Thu, Aug 10, 2023 at 11:48:51AM -0700, Ian Rogers wrote:
> Previously a BPF event of augmented_raw_syscalls.c could be used to
> enable augmentation of syscalls by perf trace. As BPF events are no
> longer supported, switch to using a BPF skeleton which when attached
> explicitly opens the sysenter and sysexit tracepoints.
>
> The dump map is removed as debugging wasn't supported by the
> augmentation and bpf_printk can be used when necessary.
>
> Remove tools/perf/examples/bpf/augmented_raw_syscalls.c so that the
> rename/migration to a BPF skeleton captures that this was the source.
there's still some:
[jolsa@krava perf]$ grep -r augmented_raw_syscalls.c
builtin-trace.c: * (now tools/perf/examples/bpf/augmented_raw_syscalls.c, so that it
builtin-trace.c: * tools/perf/examples/bpf/augmented_raw_syscalls.c,
Documentation/perf-trace.txt: living in tools/perf/examples/bpf/augmented_raw_syscalls.c. For now this
jirka
>
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
> tools/perf/Makefile.perf | 1 +
> tools/perf/builtin-trace.c | 180 +++++++++++-------
> .../bpf_skel/augmented_raw_syscalls.bpf.c} | 27 +--
> 3 files changed, 131 insertions(+), 77 deletions(-)
> rename tools/perf/{examples/bpf/augmented_raw_syscalls.c => util/bpf_skel/augmented_raw_syscalls.bpf.c} (96%)
>
> diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
> index 6ec5079fd697..0e1597712b95 100644
> --- a/tools/perf/Makefile.perf
> +++ b/tools/perf/Makefile.perf
> @@ -1042,6 +1042,7 @@ SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h
> SKELETONS += $(SKEL_OUT)/off_cpu.skel.h $(SKEL_OUT)/lock_contention.skel.h
> SKELETONS += $(SKEL_OUT)/kwork_trace.skel.h $(SKEL_OUT)/sample_filter.skel.h
> SKELETONS += $(SKEL_OUT)/bench_uprobe.skel.h
> +SKELETONS += $(SKEL_OUT)/augmented_raw_syscalls.skel.h
>
> $(SKEL_TMP_OUT) $(LIBAPI_OUTPUT) $(LIBBPF_OUTPUT) $(LIBPERF_OUTPUT) $(LIBSUBCMD_OUTPUT) $(LIBSYMBOL_OUTPUT):
> $(Q)$(MKDIR) -p $@
> diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
> index 59862467e781..8625fca42cd8 100644
> --- a/tools/perf/builtin-trace.c
> +++ b/tools/perf/builtin-trace.c
> @@ -19,6 +19,9 @@
> #ifdef HAVE_LIBBPF_SUPPORT
> #include <bpf/bpf.h>
> #include <bpf/libbpf.h>
> +#ifdef HAVE_BPF_SKEL
> +#include "bpf_skel/augmented_raw_syscalls.skel.h"
> +#endif
> #endif
> #include "util/bpf_map.h"
> #include "util/rlimit.h"
> @@ -127,25 +130,19 @@ struct trace {
> struct syscalltbl *sctbl;
> struct {
> struct syscall *table;
> - struct { // per syscall BPF_MAP_TYPE_PROG_ARRAY
> - struct bpf_map *sys_enter,
> - *sys_exit;
> - } prog_array;
> struct {
> struct evsel *sys_enter,
> - *sys_exit,
> - *augmented;
> + *sys_exit,
> + *bpf_output;
> } events;
> - struct bpf_program *unaugmented_prog;
> } syscalls;
> - struct {
> - struct bpf_map *map;
> - } dump;
> +#ifdef HAVE_BPF_SKEL
> + struct augmented_raw_syscalls_bpf *skel;
> +#endif
> struct record_opts opts;
> struct evlist *evlist;
> struct machine *host;
> struct thread *current;
> - struct bpf_object *bpf_obj;
> struct cgroup *cgroup;
> u64 base_time;
> FILE *output;
> @@ -415,6 +412,7 @@ static int evsel__init_syscall_tp(struct evsel *evsel)
> if (evsel__init_tp_uint_field(evsel, &sc->id, "__syscall_nr") &&
> evsel__init_tp_uint_field(evsel, &sc->id, "nr"))
> return -ENOENT;
> +
> return 0;
> }
>
> @@ -2845,7 +2843,7 @@ static int trace__event_handler(struct trace *trace, struct evsel *evsel,
> if (thread)
> trace__fprintf_comm_tid(trace, thread, trace->output);
>
> - if (evsel == trace->syscalls.events.augmented) {
> + if (evsel == trace->syscalls.events.bpf_output) {
> int id = perf_evsel__sc_tp_uint(evsel, id, sample);
> struct syscall *sc = trace__syscall_info(trace, evsel, id);
>
> @@ -3278,24 +3276,16 @@ static int trace__set_ev_qualifier_tp_filter(struct trace *trace)
> goto out;
> }
>
> -#ifdef HAVE_LIBBPF_SUPPORT
> -static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace, const char *name)
> -{
> - if (trace->bpf_obj == NULL)
> - return NULL;
> -
> - return bpf_object__find_map_by_name(trace->bpf_obj, name);
> -}
> -
> +#ifdef HAVE_BPF_SKEL
> static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace, const char *name)
> {
> struct bpf_program *pos, *prog = NULL;
> const char *sec_name;
>
> - if (trace->bpf_obj == NULL)
> + if (trace->skel->obj == NULL)
> return NULL;
>
> - bpf_object__for_each_program(pos, trace->bpf_obj) {
> + bpf_object__for_each_program(pos, trace->skel->obj) {
> sec_name = bpf_program__section_name(pos);
> if (sec_name && !strcmp(sec_name, name)) {
> prog = pos;
> @@ -3313,12 +3303,14 @@ static struct bpf_program *trace__find_syscall_bpf_prog(struct trace *trace, str
>
> if (prog_name == NULL) {
> char default_prog_name[256];
> - scnprintf(default_prog_name, sizeof(default_prog_name), "!syscalls:sys_%s_%s", type, sc->name);
> + scnprintf(default_prog_name, sizeof(default_prog_name), "tp/syscalls/sys_%s_%s",
> + type, sc->name);
> prog = trace__find_bpf_program_by_title(trace, default_prog_name);
> if (prog != NULL)
> goto out_found;
> if (sc->fmt && sc->fmt->alias) {
> - scnprintf(default_prog_name, sizeof(default_prog_name), "!syscalls:sys_%s_%s", type, sc->fmt->alias);
> + scnprintf(default_prog_name, sizeof(default_prog_name),
> + "tp/syscalls/sys_%s_%s", type, sc->fmt->alias);
> prog = trace__find_bpf_program_by_title(trace, default_prog_name);
> if (prog != NULL)
> goto out_found;
> @@ -3336,7 +3328,7 @@ static struct bpf_program *trace__find_syscall_bpf_prog(struct trace *trace, str
> pr_debug("Couldn't find BPF prog \"%s\" to associate with syscalls:sys_%s_%s, not augmenting it\n",
> prog_name, type, sc->name);
> out_unaugmented:
> - return trace->syscalls.unaugmented_prog;
> + return trace->skel->progs.syscall_unaugmented;
> }
>
> static void trace__init_syscall_bpf_progs(struct trace *trace, int id)
> @@ -3353,13 +3345,21 @@ static void trace__init_syscall_bpf_progs(struct trace *trace, int id)
> static int trace__bpf_prog_sys_enter_fd(struct trace *trace, int id)
> {
> struct syscall *sc = trace__syscall_info(trace, NULL, id);
> - return sc ? bpf_program__fd(sc->bpf_prog.sys_enter) : bpf_program__fd(trace->syscalls.unaugmented_prog);
> +
> + if (sc)
> + return bpf_program__fd(sc->bpf_prog.sys_enter);
> +
> + return bpf_program__fd(trace->skel->progs.syscall_unaugmented);
> }
>
> static int trace__bpf_prog_sys_exit_fd(struct trace *trace, int id)
> {
> struct syscall *sc = trace__syscall_info(trace, NULL, id);
> - return sc ? bpf_program__fd(sc->bpf_prog.sys_exit) : bpf_program__fd(trace->syscalls.unaugmented_prog);
> +
> + if (sc)
> + return bpf_program__fd(sc->bpf_prog.sys_exit);
> +
> + return bpf_program__fd(trace->skel->progs.syscall_unaugmented);
> }
>
> static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace, struct syscall *sc)
> @@ -3384,7 +3384,7 @@ static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace
> bool is_candidate = false;
>
> if (pair == NULL || pair == sc ||
> - pair->bpf_prog.sys_enter == trace->syscalls.unaugmented_prog)
> + pair->bpf_prog.sys_enter == trace->skel->progs.syscall_unaugmented)
> continue;
>
> for (field = sc->args, candidate_field = pair->args;
> @@ -3437,7 +3437,7 @@ static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace
> */
> if (pair_prog == NULL) {
> pair_prog = trace__find_syscall_bpf_prog(trace, pair, pair->fmt ? pair->fmt->bpf_prog_name.sys_enter : NULL, "enter");
> - if (pair_prog == trace->syscalls.unaugmented_prog)
> + if (pair_prog == trace->skel->progs.syscall_unaugmented)
> goto next_candidate;
> }
>
> @@ -3452,8 +3452,8 @@ static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace
>
> static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace)
> {
> - int map_enter_fd = bpf_map__fd(trace->syscalls.prog_array.sys_enter),
> - map_exit_fd = bpf_map__fd(trace->syscalls.prog_array.sys_exit);
> + int map_enter_fd = bpf_map__fd(trace->skel->maps.syscalls_sys_enter);
> + int map_exit_fd = bpf_map__fd(trace->skel->maps.syscalls_sys_exit);
> int err = 0, key;
>
> for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) {
> @@ -3515,7 +3515,7 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace)
> * For now we're just reusing the sys_enter prog, and if it
> * already has an augmenter, we don't need to find one.
> */
> - if (sc->bpf_prog.sys_enter != trace->syscalls.unaugmented_prog)
> + if (sc->bpf_prog.sys_enter != trace->skel->progs.syscall_unaugmented)
> continue;
>
> /*
> @@ -3538,22 +3538,9 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace)
> break;
> }
>
> -
> return err;
> }
> -
> -#else // HAVE_LIBBPF_SUPPORT
> -static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace __maybe_unused,
> - const char *name __maybe_unused)
> -{
> - return NULL;
> -}
> -
> -static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace __maybe_unused)
> -{
> - return 0;
> -}
> -#endif // HAVE_LIBBPF_SUPPORT
> +#endif // HAVE_BPF_SKEL
>
> static int trace__set_ev_qualifier_filter(struct trace *trace)
> {
> @@ -3917,13 +3904,31 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
> err = evlist__open(evlist);
> if (err < 0)
> goto out_error_open;
> +#ifdef HAVE_BPF_SKEL
> + {
> + struct perf_cpu cpu;
>
> + /*
> + * Set up the __augmented_syscalls__ BPF map to hold for each
> + * CPU the bpf-output event's file descriptor.
> + */
> + perf_cpu_map__for_each_cpu(cpu, i, trace->syscalls.events.bpf_output->core.cpus) {
> + bpf_map__update_elem(trace->skel->maps.__augmented_syscalls__,
> + &cpu.cpu, sizeof(int),
> + xyarray__entry(trace->syscalls.events.bpf_output->core.fd,
> + cpu.cpu, 0),
> + sizeof(__u32), BPF_ANY);
> + }
> + }
> +#endif
> err = trace__set_filter_pids(trace);
> if (err < 0)
> goto out_error_mem;
>
> - if (trace->syscalls.prog_array.sys_enter)
> +#ifdef HAVE_BPF_SKEL
> + if (trace->skel->progs.sys_enter)
> trace__init_syscalls_bpf_prog_array_maps(trace);
> +#endif
>
> if (trace->ev_qualifier_ids.nr > 0) {
> err = trace__set_ev_qualifier_filter(trace);
> @@ -3956,9 +3961,6 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
> if (err < 0)
> goto out_error_apply_filters;
>
> - if (trace->dump.map)
> - bpf_map__fprintf(trace->dump.map, trace->output);
> -
> err = evlist__mmap(evlist, trace->opts.mmap_pages);
> if (err < 0)
> goto out_error_mmap;
> @@ -4655,6 +4657,18 @@ static void trace__exit(struct trace *trace)
> zfree(&trace->perfconfig_events);
> }
>
> +#ifdef HAVE_BPF_SKEL
> +static int bpf__setup_bpf_output(struct evlist *evlist)
> +{
> + int err = parse_event(evlist, "bpf-output/no-inherit=1,name=__augmented_syscalls__/");
> +
> + if (err)
> + pr_debug("ERROR: failed to create the \"__augmented_syscalls__\" bpf-output event\n");
> +
> + return err;
> +}
> +#endif
> +
> int cmd_trace(int argc, const char **argv)
> {
> const char *trace_usage[] = {
> @@ -4686,7 +4700,6 @@ int cmd_trace(int argc, const char **argv)
> .max_stack = UINT_MAX,
> .max_events = ULONG_MAX,
> };
> - const char *map_dump_str = NULL;
> const char *output_name = NULL;
> const struct option trace_options[] = {
> OPT_CALLBACK('e', "event", &trace, "event",
> @@ -4720,9 +4733,6 @@ int cmd_trace(int argc, const char **argv)
> OPT_CALLBACK(0, "duration", &trace, "float",
> "show only events with duration > N.M ms",
> trace__set_duration),
> -#ifdef HAVE_LIBBPF_SUPPORT
> - OPT_STRING(0, "map-dump", &map_dump_str, "BPF map", "BPF map to periodically dump"),
> -#endif
> OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
> OPT_INCR('v', "verbose", &verbose, "be more verbose"),
> OPT_BOOLEAN('T', "time", &trace.full_time,
> @@ -4849,16 +4859,55 @@ int cmd_trace(int argc, const char **argv)
> "cgroup monitoring only available in system-wide mode");
> }
>
> - err = -1;
> +#ifdef HAVE_BPF_SKEL
> + trace.skel = augmented_raw_syscalls_bpf__open();
> + if (!trace.skel) {
> + pr_debug("Failed to open augmented syscalls BPF skeleton");
> + } else {
> + /*
> + * Disable attaching the BPF programs except for sys_enter and
> + * sys_exit that tail call into this as necessary.
> + */
> + bpf_program__set_autoattach(trace.skel->progs.syscall_unaugmented,
> + /*autoattach=*/false);
> + bpf_program__set_autoattach(trace.skel->progs.sys_enter_connect,
> + /*autoattach=*/false);
> + bpf_program__set_autoattach(trace.skel->progs.sys_enter_sendto,
> + /*autoattach=*/false);
> + bpf_program__set_autoattach(trace.skel->progs.sys_enter_open,
> + /*autoattach=*/false);
> + bpf_program__set_autoattach(trace.skel->progs.sys_enter_openat,
> + /*autoattach=*/false);
> + bpf_program__set_autoattach(trace.skel->progs.sys_enter_rename,
> + /*autoattach=*/false);
> + bpf_program__set_autoattach(trace.skel->progs.sys_enter_renameat,
> + /*autoattach=*/false);
> + bpf_program__set_autoattach(trace.skel->progs.sys_enter_perf_event_open,
> + /*autoattach=*/false);
> + bpf_program__set_autoattach(trace.skel->progs.sys_enter_clock_nanosleep,
> + /*autoattach=*/false);
> +
> + err = augmented_raw_syscalls_bpf__load(trace.skel);
>
> - if (map_dump_str) {
> - trace.dump.map = trace__find_bpf_map_by_name(&trace, map_dump_str);
> - if (trace.dump.map == NULL) {
> - pr_err("ERROR: BPF map \"%s\" not found\n", map_dump_str);
> - goto out;
> + if (err < 0) {
> + pr_debug("Failed to load augmented syscalls BPF skeleton\n");
> + } else {
> + augmented_raw_syscalls_bpf__attach(trace.skel);
> + trace__add_syscall_newtp(&trace);
> }
> }
>
> + err = bpf__setup_bpf_output(trace.evlist);
> + if (err) {
> + libbpf_strerror(err, bf, sizeof(bf));
> + pr_err("ERROR: Setup BPF output event failed: %s\n", bf);
> + goto out;
> + }
> + trace.syscalls.events.bpf_output = evlist__last(trace.evlist);
> + assert(!strcmp(evsel__name(trace.syscalls.events.bpf_output), "__augmented_syscalls__"));
> +#endif
> + err = -1;
> +
> if (trace.trace_pgfaults) {
> trace.opts.sample_address = true;
> trace.opts.sample_time = true;
> @@ -4909,7 +4958,7 @@ int cmd_trace(int argc, const char **argv)
> * buffers that are being copied from kernel to userspace, think 'read'
> * syscall.
> */
> - if (trace.syscalls.events.augmented) {
> + if (trace.syscalls.events.bpf_output) {
> evlist__for_each_entry(trace.evlist, evsel) {
> bool raw_syscalls_sys_exit = strcmp(evsel__name(evsel), "raw_syscalls:sys_exit") == 0;
>
> @@ -4918,9 +4967,9 @@ int cmd_trace(int argc, const char **argv)
> goto init_augmented_syscall_tp;
> }
>
> - if (trace.syscalls.events.augmented->priv == NULL &&
> + if (trace.syscalls.events.bpf_output->priv == NULL &&
> strstr(evsel__name(evsel), "syscalls:sys_enter")) {
> - struct evsel *augmented = trace.syscalls.events.augmented;
> + struct evsel *augmented = trace.syscalls.events.bpf_output;
> if (evsel__init_augmented_syscall_tp(augmented, evsel) ||
> evsel__init_augmented_syscall_tp_args(augmented))
> goto out;
> @@ -5025,5 +5074,8 @@ int cmd_trace(int argc, const char **argv)
> fclose(trace.output);
> out:
> trace__exit(&trace);
> +#ifdef HAVE_BPF_SKEL
> + augmented_raw_syscalls_bpf__destroy(trace.skel);
> +#endif
> return err;
> }
> diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
> similarity index 96%
> rename from tools/perf/examples/bpf/augmented_raw_syscalls.c
> rename to tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
> index 9a03189d33d3..70478b9460ee 100644
> --- a/tools/perf/examples/bpf/augmented_raw_syscalls.c
> +++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
> @@ -18,6 +18,8 @@
> #include <bpf/bpf_helpers.h>
> #include <linux/limits.h>
>
> +#define MAX_CPUS 4096
> +
> // FIXME: These should come from system headers
> typedef char bool;
> typedef int pid_t;
> @@ -34,7 +36,7 @@ struct __augmented_syscalls__ {
> __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
> __type(key, int);
> __type(value, __u32);
> - __uint(max_entries, __NR_CPUS__);
> + __uint(max_entries, MAX_CPUS);
> } __augmented_syscalls__ SEC(".maps");
>
> /*
> @@ -170,7 +172,7 @@ unsigned int augmented_arg__read_str(struct augmented_arg *augmented_arg, const
> return augmented_len;
> }
>
> -SEC("!raw_syscalls:unaugmented")
> +SEC("tp/raw_syscalls/sys_enter")
> int syscall_unaugmented(struct syscall_enter_args *args)
> {
> return 1;
> @@ -182,7 +184,7 @@ int syscall_unaugmented(struct syscall_enter_args *args)
> * on from there, reading the first syscall arg as a string, i.e. open's
> * filename.
> */
> -SEC("!syscalls:sys_enter_connect")
> +SEC("tp/syscalls/sys_enter_connect")
> int sys_enter_connect(struct syscall_enter_args *args)
> {
> struct augmented_args_payload *augmented_args = augmented_args_payload();
> @@ -201,7 +203,7 @@ int sys_enter_connect(struct syscall_enter_args *args)
> return augmented__output(args, augmented_args, len + socklen);
> }
>
> -SEC("!syscalls:sys_enter_sendto")
> +SEC("tp/syscalls/sys_enter_sendto")
> int sys_enter_sendto(struct syscall_enter_args *args)
> {
> struct augmented_args_payload *augmented_args = augmented_args_payload();
> @@ -220,7 +222,7 @@ int sys_enter_sendto(struct syscall_enter_args *args)
> return augmented__output(args, augmented_args, len + socklen);
> }
>
> -SEC("!syscalls:sys_enter_open")
> +SEC("tp/syscalls/sys_enter_open")
> int sys_enter_open(struct syscall_enter_args *args)
> {
> struct augmented_args_payload *augmented_args = augmented_args_payload();
> @@ -235,7 +237,7 @@ int sys_enter_open(struct syscall_enter_args *args)
> return augmented__output(args, augmented_args, len);
> }
>
> -SEC("!syscalls:sys_enter_openat")
> +SEC("tp/syscalls/sys_enter_openat")
> int sys_enter_openat(struct syscall_enter_args *args)
> {
> struct augmented_args_payload *augmented_args = augmented_args_payload();
> @@ -250,7 +252,7 @@ int sys_enter_openat(struct syscall_enter_args *args)
> return augmented__output(args, augmented_args, len);
> }
>
> -SEC("!syscalls:sys_enter_rename")
> +SEC("tp/syscalls/sys_enter_rename")
> int sys_enter_rename(struct syscall_enter_args *args)
> {
> struct augmented_args_payload *augmented_args = augmented_args_payload();
> @@ -267,7 +269,7 @@ int sys_enter_rename(struct syscall_enter_args *args)
> return augmented__output(args, augmented_args, len);
> }
>
> -SEC("!syscalls:sys_enter_renameat")
> +SEC("tp/syscalls/sys_enter_renameat")
> int sys_enter_renameat(struct syscall_enter_args *args)
> {
> struct augmented_args_payload *augmented_args = augmented_args_payload();
> @@ -295,7 +297,7 @@ struct perf_event_attr_size {
> __u32 size;
> };
>
> -SEC("!syscalls:sys_enter_perf_event_open")
> +SEC("tp/syscalls/sys_enter_perf_event_open")
> int sys_enter_perf_event_open(struct syscall_enter_args *args)
> {
> struct augmented_args_payload *augmented_args = augmented_args_payload();
> @@ -327,7 +329,7 @@ int sys_enter_perf_event_open(struct syscall_enter_args *args)
> return 1; /* Failure: don't filter */
> }
>
> -SEC("!syscalls:sys_enter_clock_nanosleep")
> +SEC("tp/syscalls/sys_enter_clock_nanosleep")
> int sys_enter_clock_nanosleep(struct syscall_enter_args *args)
> {
> struct augmented_args_payload *augmented_args = augmented_args_payload();
> @@ -358,7 +360,7 @@ static bool pid_filter__has(struct pids_filtered *pids, pid_t pid)
> return bpf_map_lookup_elem(pids, &pid) != NULL;
> }
>
> -SEC("raw_syscalls:sys_enter")
> +SEC("tp/raw_syscalls/sys_enter")
> int sys_enter(struct syscall_enter_args *args)
> {
> struct augmented_args_payload *augmented_args;
> @@ -371,7 +373,6 @@ int sys_enter(struct syscall_enter_args *args)
> * We'll add to this as we add augmented syscalls right after that
> * initial, non-augmented raw_syscalls:sys_enter payload.
> */
> - unsigned int len = sizeof(augmented_args->args);
>
> if (pid_filter__has(&pids_filtered, getpid()))
> return 0;
> @@ -393,7 +394,7 @@ int sys_enter(struct syscall_enter_args *args)
> return 0;
> }
>
> -SEC("raw_syscalls:sys_exit")
> +SEC("tp/raw_syscalls/sys_exit")
> int sys_exit(struct syscall_exit_args *args)
> {
> struct syscall_exit_args exit_args;
> --
> 2.41.0.640.ga95def55d0-goog
>
On Fri, Aug 11, 2023 at 9:09 AM Jiri Olsa <olsajiri@gmail.com> wrote:
>
> On Thu, Aug 10, 2023 at 11:48:51AM -0700, Ian Rogers wrote:
> > Previously a BPF event of augmented_raw_syscalls.c could be used to
> > enable augmentation of syscalls by perf trace. As BPF events are no
> > longer supported, switch to using a BPF skeleton which when attached
> > explicitly opens the sysenter and sysexit tracepoints.
> >
> > The dump map is removed as debugging wasn't supported by the
> > augmentation and bpf_printk can be used when necessary.
> >
> > Remove tools/perf/examples/bpf/augmented_raw_syscalls.c so that the
> > rename/migration to a BPF skeleton captures that this was the source.
>
> there's still some:
>
> [jolsa@krava perf]$ grep -r augmented_raw_syscalls.c
> builtin-trace.c: * (now tools/perf/examples/bpf/augmented_raw_syscalls.c, so that it
> builtin-trace.c: * tools/perf/examples/bpf/augmented_raw_syscalls.c,
> Documentation/perf-trace.txt: living in tools/perf/examples/bpf/augmented_raw_syscalls.c. For now this
Agreed, I'll double check but the later patches remove these. I was
trying to keep this patch down to a minimum one approach switch to the
other.
Thanks,
Ian
> jirka
>
> >
> > Signed-off-by: Ian Rogers <irogers@google.com>
> > ---
> > tools/perf/Makefile.perf | 1 +
> > tools/perf/builtin-trace.c | 180 +++++++++++-------
> > .../bpf_skel/augmented_raw_syscalls.bpf.c} | 27 +--
> > 3 files changed, 131 insertions(+), 77 deletions(-)
> > rename tools/perf/{examples/bpf/augmented_raw_syscalls.c => util/bpf_skel/augmented_raw_syscalls.bpf.c} (96%)
> >
> > diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
> > index 6ec5079fd697..0e1597712b95 100644
> > --- a/tools/perf/Makefile.perf
> > +++ b/tools/perf/Makefile.perf
> > @@ -1042,6 +1042,7 @@ SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h
> > SKELETONS += $(SKEL_OUT)/off_cpu.skel.h $(SKEL_OUT)/lock_contention.skel.h
> > SKELETONS += $(SKEL_OUT)/kwork_trace.skel.h $(SKEL_OUT)/sample_filter.skel.h
> > SKELETONS += $(SKEL_OUT)/bench_uprobe.skel.h
> > +SKELETONS += $(SKEL_OUT)/augmented_raw_syscalls.skel.h
> >
> > $(SKEL_TMP_OUT) $(LIBAPI_OUTPUT) $(LIBBPF_OUTPUT) $(LIBPERF_OUTPUT) $(LIBSUBCMD_OUTPUT) $(LIBSYMBOL_OUTPUT):
> > $(Q)$(MKDIR) -p $@
> > diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
> > index 59862467e781..8625fca42cd8 100644
> > --- a/tools/perf/builtin-trace.c
> > +++ b/tools/perf/builtin-trace.c
> > @@ -19,6 +19,9 @@
> > #ifdef HAVE_LIBBPF_SUPPORT
> > #include <bpf/bpf.h>
> > #include <bpf/libbpf.h>
> > +#ifdef HAVE_BPF_SKEL
> > +#include "bpf_skel/augmented_raw_syscalls.skel.h"
> > +#endif
> > #endif
> > #include "util/bpf_map.h"
> > #include "util/rlimit.h"
> > @@ -127,25 +130,19 @@ struct trace {
> > struct syscalltbl *sctbl;
> > struct {
> > struct syscall *table;
> > - struct { // per syscall BPF_MAP_TYPE_PROG_ARRAY
> > - struct bpf_map *sys_enter,
> > - *sys_exit;
> > - } prog_array;
> > struct {
> > struct evsel *sys_enter,
> > - *sys_exit,
> > - *augmented;
> > + *sys_exit,
> > + *bpf_output;
> > } events;
> > - struct bpf_program *unaugmented_prog;
> > } syscalls;
> > - struct {
> > - struct bpf_map *map;
> > - } dump;
> > +#ifdef HAVE_BPF_SKEL
> > + struct augmented_raw_syscalls_bpf *skel;
> > +#endif
> > struct record_opts opts;
> > struct evlist *evlist;
> > struct machine *host;
> > struct thread *current;
> > - struct bpf_object *bpf_obj;
> > struct cgroup *cgroup;
> > u64 base_time;
> > FILE *output;
> > @@ -415,6 +412,7 @@ static int evsel__init_syscall_tp(struct evsel *evsel)
> > if (evsel__init_tp_uint_field(evsel, &sc->id, "__syscall_nr") &&
> > evsel__init_tp_uint_field(evsel, &sc->id, "nr"))
> > return -ENOENT;
> > +
> > return 0;
> > }
> >
> > @@ -2845,7 +2843,7 @@ static int trace__event_handler(struct trace *trace, struct evsel *evsel,
> > if (thread)
> > trace__fprintf_comm_tid(trace, thread, trace->output);
> >
> > - if (evsel == trace->syscalls.events.augmented) {
> > + if (evsel == trace->syscalls.events.bpf_output) {
> > int id = perf_evsel__sc_tp_uint(evsel, id, sample);
> > struct syscall *sc = trace__syscall_info(trace, evsel, id);
> >
> > @@ -3278,24 +3276,16 @@ static int trace__set_ev_qualifier_tp_filter(struct trace *trace)
> > goto out;
> > }
> >
> > -#ifdef HAVE_LIBBPF_SUPPORT
> > -static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace, const char *name)
> > -{
> > - if (trace->bpf_obj == NULL)
> > - return NULL;
> > -
> > - return bpf_object__find_map_by_name(trace->bpf_obj, name);
> > -}
> > -
> > +#ifdef HAVE_BPF_SKEL
> > static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace, const char *name)
> > {
> > struct bpf_program *pos, *prog = NULL;
> > const char *sec_name;
> >
> > - if (trace->bpf_obj == NULL)
> > + if (trace->skel->obj == NULL)
> > return NULL;
> >
> > - bpf_object__for_each_program(pos, trace->bpf_obj) {
> > + bpf_object__for_each_program(pos, trace->skel->obj) {
> > sec_name = bpf_program__section_name(pos);
> > if (sec_name && !strcmp(sec_name, name)) {
> > prog = pos;
> > @@ -3313,12 +3303,14 @@ static struct bpf_program *trace__find_syscall_bpf_prog(struct trace *trace, str
> >
> > if (prog_name == NULL) {
> > char default_prog_name[256];
> > - scnprintf(default_prog_name, sizeof(default_prog_name), "!syscalls:sys_%s_%s", type, sc->name);
> > + scnprintf(default_prog_name, sizeof(default_prog_name), "tp/syscalls/sys_%s_%s",
> > + type, sc->name);
> > prog = trace__find_bpf_program_by_title(trace, default_prog_name);
> > if (prog != NULL)
> > goto out_found;
> > if (sc->fmt && sc->fmt->alias) {
> > - scnprintf(default_prog_name, sizeof(default_prog_name), "!syscalls:sys_%s_%s", type, sc->fmt->alias);
> > + scnprintf(default_prog_name, sizeof(default_prog_name),
> > + "tp/syscalls/sys_%s_%s", type, sc->fmt->alias);
> > prog = trace__find_bpf_program_by_title(trace, default_prog_name);
> > if (prog != NULL)
> > goto out_found;
> > @@ -3336,7 +3328,7 @@ static struct bpf_program *trace__find_syscall_bpf_prog(struct trace *trace, str
> > pr_debug("Couldn't find BPF prog \"%s\" to associate with syscalls:sys_%s_%s, not augmenting it\n",
> > prog_name, type, sc->name);
> > out_unaugmented:
> > - return trace->syscalls.unaugmented_prog;
> > + return trace->skel->progs.syscall_unaugmented;
> > }
> >
> > static void trace__init_syscall_bpf_progs(struct trace *trace, int id)
> > @@ -3353,13 +3345,21 @@ static void trace__init_syscall_bpf_progs(struct trace *trace, int id)
> > static int trace__bpf_prog_sys_enter_fd(struct trace *trace, int id)
> > {
> > struct syscall *sc = trace__syscall_info(trace, NULL, id);
> > - return sc ? bpf_program__fd(sc->bpf_prog.sys_enter) : bpf_program__fd(trace->syscalls.unaugmented_prog);
> > +
> > + if (sc)
> > + return bpf_program__fd(sc->bpf_prog.sys_enter);
> > +
> > + return bpf_program__fd(trace->skel->progs.syscall_unaugmented);
> > }
> >
> > static int trace__bpf_prog_sys_exit_fd(struct trace *trace, int id)
> > {
> > struct syscall *sc = trace__syscall_info(trace, NULL, id);
> > - return sc ? bpf_program__fd(sc->bpf_prog.sys_exit) : bpf_program__fd(trace->syscalls.unaugmented_prog);
> > +
> > + if (sc)
> > + return bpf_program__fd(sc->bpf_prog.sys_exit);
> > +
> > + return bpf_program__fd(trace->skel->progs.syscall_unaugmented);
> > }
> >
> > static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace, struct syscall *sc)
> > @@ -3384,7 +3384,7 @@ static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace
> > bool is_candidate = false;
> >
> > if (pair == NULL || pair == sc ||
> > - pair->bpf_prog.sys_enter == trace->syscalls.unaugmented_prog)
> > + pair->bpf_prog.sys_enter == trace->skel->progs.syscall_unaugmented)
> > continue;
> >
> > for (field = sc->args, candidate_field = pair->args;
> > @@ -3437,7 +3437,7 @@ static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace
> > */
> > if (pair_prog == NULL) {
> > pair_prog = trace__find_syscall_bpf_prog(trace, pair, pair->fmt ? pair->fmt->bpf_prog_name.sys_enter : NULL, "enter");
> > - if (pair_prog == trace->syscalls.unaugmented_prog)
> > + if (pair_prog == trace->skel->progs.syscall_unaugmented)
> > goto next_candidate;
> > }
> >
> > @@ -3452,8 +3452,8 @@ static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace
> >
> > static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace)
> > {
> > - int map_enter_fd = bpf_map__fd(trace->syscalls.prog_array.sys_enter),
> > - map_exit_fd = bpf_map__fd(trace->syscalls.prog_array.sys_exit);
> > + int map_enter_fd = bpf_map__fd(trace->skel->maps.syscalls_sys_enter);
> > + int map_exit_fd = bpf_map__fd(trace->skel->maps.syscalls_sys_exit);
> > int err = 0, key;
> >
> > for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) {
> > @@ -3515,7 +3515,7 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace)
> > * For now we're just reusing the sys_enter prog, and if it
> > * already has an augmenter, we don't need to find one.
> > */
> > - if (sc->bpf_prog.sys_enter != trace->syscalls.unaugmented_prog)
> > + if (sc->bpf_prog.sys_enter != trace->skel->progs.syscall_unaugmented)
> > continue;
> >
> > /*
> > @@ -3538,22 +3538,9 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace)
> > break;
> > }
> >
> > -
> > return err;
> > }
> > -
> > -#else // HAVE_LIBBPF_SUPPORT
> > -static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace __maybe_unused,
> > - const char *name __maybe_unused)
> > -{
> > - return NULL;
> > -}
> > -
> > -static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace __maybe_unused)
> > -{
> > - return 0;
> > -}
> > -#endif // HAVE_LIBBPF_SUPPORT
> > +#endif // HAVE_BPF_SKEL
> >
> > static int trace__set_ev_qualifier_filter(struct trace *trace)
> > {
> > @@ -3917,13 +3904,31 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
> > err = evlist__open(evlist);
> > if (err < 0)
> > goto out_error_open;
> > +#ifdef HAVE_BPF_SKEL
> > + {
> > + struct perf_cpu cpu;
> >
> > + /*
> > + * Set up the __augmented_syscalls__ BPF map to hold for each
> > + * CPU the bpf-output event's file descriptor.
> > + */
> > + perf_cpu_map__for_each_cpu(cpu, i, trace->syscalls.events.bpf_output->core.cpus) {
> > + bpf_map__update_elem(trace->skel->maps.__augmented_syscalls__,
> > + &cpu.cpu, sizeof(int),
> > + xyarray__entry(trace->syscalls.events.bpf_output->core.fd,
> > + cpu.cpu, 0),
> > + sizeof(__u32), BPF_ANY);
> > + }
> > + }
> > +#endif
> > err = trace__set_filter_pids(trace);
> > if (err < 0)
> > goto out_error_mem;
> >
> > - if (trace->syscalls.prog_array.sys_enter)
> > +#ifdef HAVE_BPF_SKEL
> > + if (trace->skel->progs.sys_enter)
> > trace__init_syscalls_bpf_prog_array_maps(trace);
> > +#endif
> >
> > if (trace->ev_qualifier_ids.nr > 0) {
> > err = trace__set_ev_qualifier_filter(trace);
> > @@ -3956,9 +3961,6 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
> > if (err < 0)
> > goto out_error_apply_filters;
> >
> > - if (trace->dump.map)
> > - bpf_map__fprintf(trace->dump.map, trace->output);
> > -
> > err = evlist__mmap(evlist, trace->opts.mmap_pages);
> > if (err < 0)
> > goto out_error_mmap;
> > @@ -4655,6 +4657,18 @@ static void trace__exit(struct trace *trace)
> > zfree(&trace->perfconfig_events);
> > }
> >
> > +#ifdef HAVE_BPF_SKEL
> > +static int bpf__setup_bpf_output(struct evlist *evlist)
> > +{
> > + int err = parse_event(evlist, "bpf-output/no-inherit=1,name=__augmented_syscalls__/");
> > +
> > + if (err)
> > + pr_debug("ERROR: failed to create the \"__augmented_syscalls__\" bpf-output event\n");
> > +
> > + return err;
> > +}
> > +#endif
> > +
> > int cmd_trace(int argc, const char **argv)
> > {
> > const char *trace_usage[] = {
> > @@ -4686,7 +4700,6 @@ int cmd_trace(int argc, const char **argv)
> > .max_stack = UINT_MAX,
> > .max_events = ULONG_MAX,
> > };
> > - const char *map_dump_str = NULL;
> > const char *output_name = NULL;
> > const struct option trace_options[] = {
> > OPT_CALLBACK('e', "event", &trace, "event",
> > @@ -4720,9 +4733,6 @@ int cmd_trace(int argc, const char **argv)
> > OPT_CALLBACK(0, "duration", &trace, "float",
> > "show only events with duration > N.M ms",
> > trace__set_duration),
> > -#ifdef HAVE_LIBBPF_SUPPORT
> > - OPT_STRING(0, "map-dump", &map_dump_str, "BPF map", "BPF map to periodically dump"),
> > -#endif
> > OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
> > OPT_INCR('v', "verbose", &verbose, "be more verbose"),
> > OPT_BOOLEAN('T', "time", &trace.full_time,
> > @@ -4849,16 +4859,55 @@ int cmd_trace(int argc, const char **argv)
> > "cgroup monitoring only available in system-wide mode");
> > }
> >
> > - err = -1;
> > +#ifdef HAVE_BPF_SKEL
> > + trace.skel = augmented_raw_syscalls_bpf__open();
> > + if (!trace.skel) {
> > + pr_debug("Failed to open augmented syscalls BPF skeleton");
> > + } else {
> > + /*
> > + * Disable attaching the BPF programs except for sys_enter and
> > + * sys_exit that tail call into this as necessary.
> > + */
> > + bpf_program__set_autoattach(trace.skel->progs.syscall_unaugmented,
> > + /*autoattach=*/false);
> > + bpf_program__set_autoattach(trace.skel->progs.sys_enter_connect,
> > + /*autoattach=*/false);
> > + bpf_program__set_autoattach(trace.skel->progs.sys_enter_sendto,
> > + /*autoattach=*/false);
> > + bpf_program__set_autoattach(trace.skel->progs.sys_enter_open,
> > + /*autoattach=*/false);
> > + bpf_program__set_autoattach(trace.skel->progs.sys_enter_openat,
> > + /*autoattach=*/false);
> > + bpf_program__set_autoattach(trace.skel->progs.sys_enter_rename,
> > + /*autoattach=*/false);
> > + bpf_program__set_autoattach(trace.skel->progs.sys_enter_renameat,
> > + /*autoattach=*/false);
> > + bpf_program__set_autoattach(trace.skel->progs.sys_enter_perf_event_open,
> > + /*autoattach=*/false);
> > + bpf_program__set_autoattach(trace.skel->progs.sys_enter_clock_nanosleep,
> > + /*autoattach=*/false);
> > +
> > + err = augmented_raw_syscalls_bpf__load(trace.skel);
> >
> > - if (map_dump_str) {
> > - trace.dump.map = trace__find_bpf_map_by_name(&trace, map_dump_str);
> > - if (trace.dump.map == NULL) {
> > - pr_err("ERROR: BPF map \"%s\" not found\n", map_dump_str);
> > - goto out;
> > + if (err < 0) {
> > + pr_debug("Failed to load augmented syscalls BPF skeleton\n");
> > + } else {
> > + augmented_raw_syscalls_bpf__attach(trace.skel);
> > + trace__add_syscall_newtp(&trace);
> > }
> > }
> >
> > + err = bpf__setup_bpf_output(trace.evlist);
> > + if (err) {
> > + libbpf_strerror(err, bf, sizeof(bf));
> > + pr_err("ERROR: Setup BPF output event failed: %s\n", bf);
> > + goto out;
> > + }
> > + trace.syscalls.events.bpf_output = evlist__last(trace.evlist);
> > + assert(!strcmp(evsel__name(trace.syscalls.events.bpf_output), "__augmented_syscalls__"));
> > +#endif
> > + err = -1;
> > +
> > if (trace.trace_pgfaults) {
> > trace.opts.sample_address = true;
> > trace.opts.sample_time = true;
> > @@ -4909,7 +4958,7 @@ int cmd_trace(int argc, const char **argv)
> > * buffers that are being copied from kernel to userspace, think 'read'
> > * syscall.
> > */
> > - if (trace.syscalls.events.augmented) {
> > + if (trace.syscalls.events.bpf_output) {
> > evlist__for_each_entry(trace.evlist, evsel) {
> > bool raw_syscalls_sys_exit = strcmp(evsel__name(evsel), "raw_syscalls:sys_exit") == 0;
> >
> > @@ -4918,9 +4967,9 @@ int cmd_trace(int argc, const char **argv)
> > goto init_augmented_syscall_tp;
> > }
> >
> > - if (trace.syscalls.events.augmented->priv == NULL &&
> > + if (trace.syscalls.events.bpf_output->priv == NULL &&
> > strstr(evsel__name(evsel), "syscalls:sys_enter")) {
> > - struct evsel *augmented = trace.syscalls.events.augmented;
> > + struct evsel *augmented = trace.syscalls.events.bpf_output;
> > if (evsel__init_augmented_syscall_tp(augmented, evsel) ||
> > evsel__init_augmented_syscall_tp_args(augmented))
> > goto out;
> > @@ -5025,5 +5074,8 @@ int cmd_trace(int argc, const char **argv)
> > fclose(trace.output);
> > out:
> > trace__exit(&trace);
> > +#ifdef HAVE_BPF_SKEL
> > + augmented_raw_syscalls_bpf__destroy(trace.skel);
> > +#endif
> > return err;
> > }
> > diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
> > similarity index 96%
> > rename from tools/perf/examples/bpf/augmented_raw_syscalls.c
> > rename to tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
> > index 9a03189d33d3..70478b9460ee 100644
> > --- a/tools/perf/examples/bpf/augmented_raw_syscalls.c
> > +++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
> > @@ -18,6 +18,8 @@
> > #include <bpf/bpf_helpers.h>
> > #include <linux/limits.h>
> >
> > +#define MAX_CPUS 4096
> > +
> > // FIXME: These should come from system headers
> > typedef char bool;
> > typedef int pid_t;
> > @@ -34,7 +36,7 @@ struct __augmented_syscalls__ {
> > __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
> > __type(key, int);
> > __type(value, __u32);
> > - __uint(max_entries, __NR_CPUS__);
> > + __uint(max_entries, MAX_CPUS);
> > } __augmented_syscalls__ SEC(".maps");
> >
> > /*
> > @@ -170,7 +172,7 @@ unsigned int augmented_arg__read_str(struct augmented_arg *augmented_arg, const
> > return augmented_len;
> > }
> >
> > -SEC("!raw_syscalls:unaugmented")
> > +SEC("tp/raw_syscalls/sys_enter")
> > int syscall_unaugmented(struct syscall_enter_args *args)
> > {
> > return 1;
> > @@ -182,7 +184,7 @@ int syscall_unaugmented(struct syscall_enter_args *args)
> > * on from there, reading the first syscall arg as a string, i.e. open's
> > * filename.
> > */
> > -SEC("!syscalls:sys_enter_connect")
> > +SEC("tp/syscalls/sys_enter_connect")
> > int sys_enter_connect(struct syscall_enter_args *args)
> > {
> > struct augmented_args_payload *augmented_args = augmented_args_payload();
> > @@ -201,7 +203,7 @@ int sys_enter_connect(struct syscall_enter_args *args)
> > return augmented__output(args, augmented_args, len + socklen);
> > }
> >
> > -SEC("!syscalls:sys_enter_sendto")
> > +SEC("tp/syscalls/sys_enter_sendto")
> > int sys_enter_sendto(struct syscall_enter_args *args)
> > {
> > struct augmented_args_payload *augmented_args = augmented_args_payload();
> > @@ -220,7 +222,7 @@ int sys_enter_sendto(struct syscall_enter_args *args)
> > return augmented__output(args, augmented_args, len + socklen);
> > }
> >
> > -SEC("!syscalls:sys_enter_open")
> > +SEC("tp/syscalls/sys_enter_open")
> > int sys_enter_open(struct syscall_enter_args *args)
> > {
> > struct augmented_args_payload *augmented_args = augmented_args_payload();
> > @@ -235,7 +237,7 @@ int sys_enter_open(struct syscall_enter_args *args)
> > return augmented__output(args, augmented_args, len);
> > }
> >
> > -SEC("!syscalls:sys_enter_openat")
> > +SEC("tp/syscalls/sys_enter_openat")
> > int sys_enter_openat(struct syscall_enter_args *args)
> > {
> > struct augmented_args_payload *augmented_args = augmented_args_payload();
> > @@ -250,7 +252,7 @@ int sys_enter_openat(struct syscall_enter_args *args)
> > return augmented__output(args, augmented_args, len);
> > }
> >
> > -SEC("!syscalls:sys_enter_rename")
> > +SEC("tp/syscalls/sys_enter_rename")
> > int sys_enter_rename(struct syscall_enter_args *args)
> > {
> > struct augmented_args_payload *augmented_args = augmented_args_payload();
> > @@ -267,7 +269,7 @@ int sys_enter_rename(struct syscall_enter_args *args)
> > return augmented__output(args, augmented_args, len);
> > }
> >
> > -SEC("!syscalls:sys_enter_renameat")
> > +SEC("tp/syscalls/sys_enter_renameat")
> > int sys_enter_renameat(struct syscall_enter_args *args)
> > {
> > struct augmented_args_payload *augmented_args = augmented_args_payload();
> > @@ -295,7 +297,7 @@ struct perf_event_attr_size {
> > __u32 size;
> > };
> >
> > -SEC("!syscalls:sys_enter_perf_event_open")
> > +SEC("tp/syscalls/sys_enter_perf_event_open")
> > int sys_enter_perf_event_open(struct syscall_enter_args *args)
> > {
> > struct augmented_args_payload *augmented_args = augmented_args_payload();
> > @@ -327,7 +329,7 @@ int sys_enter_perf_event_open(struct syscall_enter_args *args)
> > return 1; /* Failure: don't filter */
> > }
> >
> > -SEC("!syscalls:sys_enter_clock_nanosleep")
> > +SEC("tp/syscalls/sys_enter_clock_nanosleep")
> > int sys_enter_clock_nanosleep(struct syscall_enter_args *args)
> > {
> > struct augmented_args_payload *augmented_args = augmented_args_payload();
> > @@ -358,7 +360,7 @@ static bool pid_filter__has(struct pids_filtered *pids, pid_t pid)
> > return bpf_map_lookup_elem(pids, &pid) != NULL;
> > }
> >
> > -SEC("raw_syscalls:sys_enter")
> > +SEC("tp/raw_syscalls/sys_enter")
> > int sys_enter(struct syscall_enter_args *args)
> > {
> > struct augmented_args_payload *augmented_args;
> > @@ -371,7 +373,6 @@ int sys_enter(struct syscall_enter_args *args)
> > * We'll add to this as we add augmented syscalls right after that
> > * initial, non-augmented raw_syscalls:sys_enter payload.
> > */
> > - unsigned int len = sizeof(augmented_args->args);
> >
> > if (pid_filter__has(&pids_filtered, getpid()))
> > return 0;
> > @@ -393,7 +394,7 @@ int sys_enter(struct syscall_enter_args *args)
> > return 0;
> > }
> >
> > -SEC("raw_syscalls:sys_exit")
> > +SEC("tp/raw_syscalls/sys_exit")
> > int sys_exit(struct syscall_exit_args *args)
> > {
> > struct syscall_exit_args exit_args;
> > --
> > 2.41.0.640.ga95def55d0-goog
> >
© 2016 - 2026 Red Hat, Inc.