[v2] perf trace: Enhanced augmentation for pointer arguments

[PATCH v2 09/10] perf trace: Collect augmented data using BPF

Posted by Howard Chu 1 year, 5 months ago

Include trace_augment.h for TRACE_AUG_MAX_BUF, so that BPF reads
TRACE_AUG_MAX_BUF bytes of buffer maximum.

Determine what type of argument and how many bytes to read from user space, us ing the
value in the beauty_map. This is the relation of parameter type and its corres ponding
value in the beauty map, and how many bytes we read eventually:

string: 1                          -> size of string (till null)
struct: size of struct             -> size of struct
buffer: -1 * (index of paired len) -> value of paired len (maximum: TRACE_AUG_ MAX_BUF)

After reading from user space, we output the augmented data using
bpf_perf_event_output().

If the struct augmenter, augment_sys_enter() failed, we fall back to
using bpf_tail_call().

I have to make the payload 6 times the size of augmented_arg, to pass the
BPF verifier.

Signed-off-by: Howard Chu <howardchu95@gmail.com>
---
 .../bpf_skel/augmented_raw_syscalls.bpf.c     | 114 +++++++++++++++++-
 1 file changed, 113 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
index c7b9f80239c7..d665af449b1b 100644
--- a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
+++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
@@ -7,6 +7,8 @@
  */
 
 #include "vmlinux.h"
+#include "../trace_augment.h"
+
 #include <bpf/bpf_helpers.h>
 #include <linux/limits.h>
 
@@ -135,6 +137,25 @@ struct augmented_args_tmp {
 	__uint(max_entries, 1);
 } augmented_args_tmp SEC(".maps");
 
+struct beauty_payload_enter {
+	struct syscall_enter_args args;
+	struct augmented_arg aug_args[6];
+};
+
+struct beauty_map_enter {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, int);
+	__type(value, __u32[6]);
+	__uint(max_entries, 512);
+} beauty_map_enter SEC(".maps");
+
+struct beauty_payload_enter_map {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__type(key, int);
+	__type(value, struct beauty_payload_enter);
+	__uint(max_entries, 1);
+} beauty_payload_enter_map SEC(".maps");
+
 static inline struct augmented_args_payload *augmented_args_payload(void)
 {
 	int key = 0;
@@ -147,6 +168,11 @@ static inline int augmented__output(void *ctx, struct augmented_args_payload *ar
 	return bpf_perf_event_output(ctx, &__augmented_syscalls__, BPF_F_CURRENT_CPU, args, len);
 }
 
+static inline int augmented__beauty_output(void *ctx, void *data, int len)
+{
+	return bpf_perf_event_output(ctx, &__augmented_syscalls__, BPF_F_CURRENT_CPU, data, len);
+}
+
 static inline
 unsigned int augmented_arg__read_str(struct augmented_arg *augmented_arg, const void *arg, unsigned int arg_len)
 {
@@ -399,6 +425,91 @@ static inline bool task_can_trace()
 	return true;
 }
 
+static int augment_sys_enter(void *ctx, struct syscall_enter_args *args)
+{
+	bool augmented, do_output = false;
+	int zero = 0, size, aug_size, index, output = 0,
+	    value_size = sizeof(struct augmented_arg) - offsetof(struct augmented_arg, value);
+	unsigned int nr, *beauty_map;
+	struct beauty_payload_enter *payload;
+	void *arg, *payload_offset;
+
+	/* fall back to do predefined tail call */
+	if (args == NULL)
+		return 1;
+
+	/* use syscall number to get beauty_map entry */
+	nr             = (__u32)args->syscall_nr;
+	beauty_map     = bpf_map_lookup_elem(&beauty_map_enter, &nr);
+
+	/* set up payload for output */
+	payload        = bpf_map_lookup_elem(&beauty_payload_enter_map, &zero);
+	payload_offset = (void *)&payload->aug_args;
+
+	if (beauty_map == NULL || payload == NULL)
+		return 1;
+
+	/* copy the sys_enter header, which has the syscall_nr */
+	__builtin_memcpy(&payload->args, args, sizeof(struct syscall_enter_args));
+
+	/*
+	 * Determine what type of argument and how many bytes to read from user space, using the
+	 * value in the beauty_map. This is the relation of parameter type and its corresponding
+	 * value in the beauty map, and how many bytes we read eventually:
+	 *
+	 * string: 1			      -> size of string
+	 * struct: size of struct	      -> size of struct
+	 * buffer: -1 * (index of paired len) -> value of paired len (maximum: TRACE_AUG_MAX_BUF)
+	 */
+	for (int i = 0; i < 6; i++) {
+		arg = (void *)args->args[i];
+		augmented = false;
+		size = beauty_map[i];
+		aug_size = size; /* size of the augmented data read from user space */
+
+		if (size == 0 || arg == NULL)
+			continue;
+
+		if (size == 1) { /* string */
+			aug_size = bpf_probe_read_user_str(((struct augmented_arg *)payload_offset)->value, value_size, arg);
+			/* minimum of 0 to pass the verifier */
+			if (aug_size < 0)
+				aug_size = 0;
+
+			augmented = true;
+		} else if (size > 0 && size <= value_size) { /* struct */
+			if (!bpf_probe_read_user(((struct augmented_arg *)payload_offset)->value, size, arg))
+				augmented = true;
+		} else if (size < 0 && size >= -6) { /* buffer */
+			index = -(size + 1);
+			aug_size = args->args[index];
+
+			if (aug_size > TRACE_AUG_MAX_BUF)
+				aug_size = TRACE_AUG_MAX_BUF;
+
+			if (aug_size > 0) {
+				if (!bpf_probe_read_user(((struct augmented_arg *)payload_offset)->value, aug_size, arg))
+					augmented = true;
+			}
+		}
+
+		/* write data to payload */
+		if (augmented) {
+			int written = offsetof(struct augmented_arg, value) + aug_size;
+
+			((struct augmented_arg *)payload_offset)->size = aug_size;
+			output += written;
+			payload_offset += written;
+			do_output = true;
+		}
+	}
+
+	if (!do_output)
+		return 1;
+
+	return augmented__beauty_output(ctx, payload, sizeof(struct syscall_enter_args) + output);
+}
+
 SEC("tp/raw_syscalls/sys_enter")
 int sys_enter(struct syscall_enter_args *args)
 {
@@ -427,7 +538,8 @@ int sys_enter(struct syscall_enter_args *args)
 	 * "!raw_syscalls:unaugmented" that will just return 1 to return the
 	 * unaugmented tracepoint payload.
 	 */
-	bpf_tail_call(args, &syscalls_sys_enter, augmented_args->args.syscall_nr);
+	if (augment_sys_enter(args, &augmented_args->args))
+		bpf_tail_call(args, &syscalls_sys_enter, augmented_args->args.syscall_nr);
 
 	// If not found on the PROG_ARRAY syscalls map, then we're filtering it:
 	return 0;
-- 
2.45.2

Re: [PATCH v2 09/10] perf trace: Collect augmented data using BPF

Posted by Arnaldo Carvalho de Melo 1 year, 5 months ago

On Thu, Aug 15, 2024 at 09:36:25AM +0800, Howard Chu wrote:
> +++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
> @@ -427,7 +538,8 @@ int sys_enter(struct syscall_enter_args *args)
>  	 * "!raw_syscalls:unaugmented" that will just return 1 to return the
>  	 * unaugmented tracepoint payload.
>  	 */
> -	bpf_tail_call(args, &syscalls_sys_enter, augmented_args->args.syscall_nr);
> +	if (augment_sys_enter(args, &augmented_args->args))
> +		bpf_tail_call(args, &syscalls_sys_enter, augmented_args->args.syscall_nr);

We shouldn't do that, instead we keep doing

	bpf_tail_call(args, &syscalls_sys_enter, augmented_args->args.syscall_nr);

And userspace will setup the syscalls_sys_enter map adding the generic
pointer collector (augment_sys_enter) for syscalls that have pointers
_and_ are not serviced by a pre-existing, specialized handler, this way
we keep the ones we have already and that already take into account
pretty printing network addresses based on the network family, knows how
to pretty print flags (the perf_event_open, etc).

I'll try to do this now.

- Arnaldo

>  
>  	// If not found on the PROG_ARRAY syscalls map, then we're filtering it:
>  	return 0;
> -- 
> 2.45.2

Re: [PATCH v2 09/10] perf trace: Collect augmented data using BPF

Posted by Arnaldo Carvalho de Melo 1 year, 5 months ago

On Fri, Aug 23, 2024 at 10:24:09AM -0300, Arnaldo Carvalho de Melo wrote:
> On Thu, Aug 15, 2024 at 09:36:25AM +0800, Howard Chu wrote:
> > +++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
> > @@ -427,7 +538,8 @@ int sys_enter(struct syscall_enter_args *args)
> >  	 * "!raw_syscalls:unaugmented" that will just return 1 to return the
> >  	 * unaugmented tracepoint payload.
> >  	 */
> > -	bpf_tail_call(args, &syscalls_sys_enter, augmented_args->args.syscall_nr);
> > +	if (augment_sys_enter(args, &augmented_args->args))
> > +		bpf_tail_call(args, &syscalls_sys_enter, augmented_args->args.syscall_nr);
> 
> We shouldn't do that, instead we keep doing
> 
> 	bpf_tail_call(args, &syscalls_sys_enter, augmented_args->args.syscall_nr);
> 
> And userspace will setup the syscalls_sys_enter map adding the generic
> pointer collector (augment_sys_enter) for syscalls that have pointers
> _and_ are not serviced by a pre-existing, specialized handler, this way
> we keep the ones we have already and that already take into account
> pretty printing network addresses based on the network family, knows how
> to pretty print flags (the perf_event_open, etc).
> 
> I'll try to do this now.

So, step by step, first this, and then hook it to the syscalls that:

1) have a pointer to collect and no handler, i.e. as a last step in
assigning the functions to be tail called from the syscalls BPF map.

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 37ca96e130a5862d..a909880bd25e51d1 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1007,9 +1007,11 @@ static size_t btf_enum_scnprintf(const struct btf_type *type, struct btf *btf, c
 	return 0;
 }
 
-static size_t trace__btf_scnprintf(struct trace *trace, struct syscall_arg_fmt *arg_fmt, char *bf,
+static size_t trace__btf_scnprintf(struct trace *trace, struct syscall_arg *arg, char *bf,
 				   size_t size, int val, char *type)
 {
+	struct syscall_arg_fmt *arg_fmt = arg->fmt;
+
 	if (trace->btf == NULL)
 		return 0;
 
@@ -1029,7 +1031,7 @@ static size_t trace__btf_scnprintf(struct trace *trace, struct syscall_arg_fmt *
 }
 
 #else // HAVE_LIBBPF_SUPPORT
-static size_t trace__btf_scnprintf(struct trace *trace __maybe_unused, struct syscall_arg_fmt *arg_fmt __maybe_unused,
+static size_t trace__btf_scnprintf(struct trace *trace __maybe_unused, struct syscall_arg *arg __maybe_unused,
 				   char *bf __maybe_unused, size_t size __maybe_unused, int val __maybe_unused,
 				   char *type __maybe_unused)
 {
@@ -2284,7 +2286,7 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
 			if (trace->show_arg_names)
 				printed += scnprintf(bf + printed, size - printed, "%s: ", field->name);
 
-			btf_printed = trace__btf_scnprintf(trace, &sc->arg_fmt[arg.idx], bf + printed,
+			btf_printed = trace__btf_scnprintf(trace, &arg, bf + printed,
 							   size - printed, val, field->type);
 			if (btf_printed) {
 				printed += btf_printed;
@@ -2987,7 +2989,7 @@ static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel,
 		if (trace->show_arg_names)
 			printed += scnprintf(bf + printed, size - printed, "%s: ", field->name);
 
-		btf_printed = trace__btf_scnprintf(trace, arg, bf + printed, size - printed, val, field->type);
+		btf_printed = trace__btf_scnprintf(trace, &syscall_arg, bf + printed, size - printed, val, field->type);
 		if (btf_printed) {
 			printed += btf_printed;
 			continue;

Re: [PATCH v2 09/10] perf trace: Collect augmented data using BPF

Posted by Arnaldo Carvalho de Melo 1 year, 5 months ago

On Fri, Aug 23, 2024 at 10:38:21AM -0300, Arnaldo Carvalho de Melo wrote:
> On Fri, Aug 23, 2024 at 10:24:09AM -0300, Arnaldo Carvalho de Melo wrote:
> > On Thu, Aug 15, 2024 at 09:36:25AM +0800, Howard Chu wrote:
> > > +++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
> > > @@ -427,7 +538,8 @@ int sys_enter(struct syscall_enter_args *args)
> > >  	 * "!raw_syscalls:unaugmented" that will just return 1 to return the
> > >  	 * unaugmented tracepoint payload.
> > >  	 */
> > > -	bpf_tail_call(args, &syscalls_sys_enter, augmented_args->args.syscall_nr);
> > > +	if (augment_sys_enter(args, &augmented_args->args))
> > > +		bpf_tail_call(args, &syscalls_sys_enter, augmented_args->args.syscall_nr);
> > 
> > We shouldn't do that, instead we keep doing
> > 
> > 	bpf_tail_call(args, &syscalls_sys_enter, augmented_args->args.syscall_nr);
> > 
> > And userspace will setup the syscalls_sys_enter map adding the generic
> > pointer collector (augment_sys_enter) for syscalls that have pointers
> > _and_ are not serviced by a pre-existing, specialized handler, this way
> > we keep the ones we have already and that already take into account
> > pretty printing network addresses based on the network family, knows how
> > to pretty print flags (the perf_event_open, etc).
> > 
> > I'll try to do this now.
> 
> So, step by step, first this, and then hook it to the syscalls that:
> 
> 1) have a pointer to collect and no handler, i.e. as a last step in
> assigning the functions to be tail called from the syscalls BPF map.
> 

Sorry, sent the wrong patch, this one is the right one:

diff --git a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
index f29a8dfca044649b..4c8176f9a77ca5bb 100644
--- a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
+++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
@@ -398,7 +398,11 @@ static bool pid_filter__has(struct pids_filtered *pids, pid_t pid)
 	return bpf_map_lookup_elem(pids, &pid) != NULL;
 }
 
-static int augment_sys_enter(void *ctx, struct syscall_enter_args *args)
+// Will be tail called for syscalls with pointers, the setup is done
+// in builtin-trace.c as the fallback for syscalls not handled by specialed code,
+// like the network ones that need to look at one field to then decide how to
+// pretty print a network specific address, etc.
+int sys_enter_augmented(struct syscall_enter_args *args)
 {
 	bool augmented, do_output = false;
 	int zero = 0, size, aug_size, index, output = 0,
@@ -480,7 +484,7 @@ static int augment_sys_enter(void *ctx, struct syscall_enter_args *args)
 	if (!do_output)
 		return 1;
 
-	return augmented__beauty_output(ctx, payload, sizeof(struct syscall_enter_args) + output);
+	return augmented__beauty_output(args, payload, sizeof(struct syscall_enter_args) + output);
 }
 
 SEC("tp/raw_syscalls/sys_enter")
@@ -511,8 +515,7 @@ int sys_enter(struct syscall_enter_args *args)
 	 * "!raw_syscalls:unaugmented" that will just return 1 to return the
 	 * unaugmented tracepoint payload.
 	 */
-	if (augment_sys_enter(args, &augmented_args->args))
-		bpf_tail_call(args, &syscalls_sys_enter, augmented_args->args.syscall_nr);
+	bpf_tail_call(args, &syscalls_sys_enter, augmented_args->args.syscall_nr);
 
 	// If not found on the PROG_ARRAY syscalls map, then we're filtering it:
 	return 0;

Re: [PATCH v2 09/10] perf trace: Collect augmented data using BPF

Posted by Arnaldo Carvalho de Melo 1 year, 5 months ago

On Fri, Aug 23, 2024 at 10:42:21AM -0300, Arnaldo Carvalho de Melo wrote:
> On Fri, Aug 23, 2024 at 10:38:21AM -0300, Arnaldo Carvalho de Melo wrote:
> > On Fri, Aug 23, 2024 at 10:24:09AM -0300, Arnaldo Carvalho de Melo wrote:
> > > On Thu, Aug 15, 2024 at 09:36:25AM +0800, Howard Chu wrote:
> > > > +++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
> > > > @@ -427,7 +538,8 @@ int sys_enter(struct syscall_enter_args *args)
> > > >  	 * "!raw_syscalls:unaugmented" that will just return 1 to return the
> > > >  	 * unaugmented tracepoint payload.
> > > >  	 */
> > > > -	bpf_tail_call(args, &syscalls_sys_enter, augmented_args->args.syscall_nr);
> > > > +	if (augment_sys_enter(args, &augmented_args->args))
> > > > +		bpf_tail_call(args, &syscalls_sys_enter, augmented_args->args.syscall_nr);
> > > 
> > > We shouldn't do that, instead we keep doing
> > > 
> > > 	bpf_tail_call(args, &syscalls_sys_enter, augmented_args->args.syscall_nr);
> > > 
> > > And userspace will setup the syscalls_sys_enter map adding the generic
> > > pointer collector (augment_sys_enter) for syscalls that have pointers
> > > _and_ are not serviced by a pre-existing, specialized handler, this way
> > > we keep the ones we have already and that already take into account
> > > pretty printing network addresses based on the network family, knows how
> > > to pretty print flags (the perf_event_open, etc).
> > > 
> > > I'll try to do this now.
> > 
> > So, step by step, first this, and then hook it to the syscalls that:
> > 
> > 1) have a pointer to collect and no handler, i.e. as a last step in
> > assigning the functions to be tail called from the syscalls BPF map.
> > 

Sorry, I'm wrong, we can use the generic collector and just in userspace
wire it up to the more specialized pretty printer :-)

- Arnaldo