kernel/bpf/verifier.c | 6 +- tools/testing/selftests/bpf/Makefile | 3 +- .../selftests/bpf/prog_tests/ringbuf.c | 50 +++++++++++++++ .../bpf/progs/test_ringbuf_bpf_to_bpf.c | 64 +++++++++++++++++++ 4 files changed, 120 insertions(+), 3 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/test_ringbuf_bpf_to_bpf.c
Right now there exists prog produce / userspace consume and userspace
produce / prog consume support. But it is also useful to have prog
produce / prog consume.
For example, we want to track the latency overhead of cpumap in
production. Since we need to store enqueue timestamps somewhere and
cpumap is MPSC, we need an MPSC data structure to shadow cpumap. BPF
ringbuf is such a data structure. Rather than reimplement (possibly
poorly) a custom ringbuffer in BPF, extend the existing interface to
allow the final quadrant of ringbuf usecases to be filled. Note we
ignore userspace to userspace use case - there is no need to involve
kernel for that.
Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
---
kernel/bpf/verifier.c | 6 +-
tools/testing/selftests/bpf/Makefile | 3 +-
.../selftests/bpf/prog_tests/ringbuf.c | 50 +++++++++++++++
.../bpf/progs/test_ringbuf_bpf_to_bpf.c | 64 +++++++++++++++++++
4 files changed, 120 insertions(+), 3 deletions(-)
create mode 100644 tools/testing/selftests/bpf/progs/test_ringbuf_bpf_to_bpf.c
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 53d0556fbbf3..56bfe559f228 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -9142,7 +9142,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
func_id != BPF_FUNC_ringbuf_query &&
func_id != BPF_FUNC_ringbuf_reserve_dynptr &&
func_id != BPF_FUNC_ringbuf_submit_dynptr &&
- func_id != BPF_FUNC_ringbuf_discard_dynptr)
+ func_id != BPF_FUNC_ringbuf_discard_dynptr &&
+ func_id != BPF_FUNC_user_ringbuf_drain)
goto error;
break;
case BPF_MAP_TYPE_USER_RINGBUF:
@@ -9276,7 +9277,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
goto error;
break;
case BPF_FUNC_user_ringbuf_drain:
- if (map->map_type != BPF_MAP_TYPE_USER_RINGBUF)
+ if (map->map_type != BPF_MAP_TYPE_USER_RINGBUF &&
+ map->map_type != BPF_MAP_TYPE_RINGBUF)
goto error;
break;
case BPF_FUNC_get_stackid:
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 9905e3739dd0..233109843d4d 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -503,7 +503,8 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \
LSKELS := fentry_test.c fexit_test.c fexit_sleep.c atomics.c \
trace_printk.c trace_vprintk.c map_ptr_kern.c \
core_kern.c core_kern_overflow.c test_ringbuf.c \
- test_ringbuf_n.c test_ringbuf_map_key.c test_ringbuf_write.c
+ test_ringbuf_n.c test_ringbuf_map_key.c test_ringbuf_write.c \
+ test_ringbuf_bpf_to_bpf.c
# Generate both light skeleton and libbpf skeleton for these
LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c kfunc_call_test.c \
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
index da430df45aa4..3e7955573ac5 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
@@ -17,6 +17,7 @@
#include "test_ringbuf_n.lskel.h"
#include "test_ringbuf_map_key.lskel.h"
#include "test_ringbuf_write.lskel.h"
+#include "test_ringbuf_bpf_to_bpf.lskel.h"
#define EDONE 7777
@@ -497,6 +498,53 @@ static void ringbuf_map_key_subtest(void)
test_ringbuf_map_key_lskel__destroy(skel_map_key);
}
+static void ringbuf_bpf_to_bpf_subtest(void)
+{
+ struct test_ringbuf_bpf_to_bpf_lskel *skel;
+ int err, i;
+
+ skel = test_ringbuf_bpf_to_bpf_lskel__open();
+ if (!ASSERT_OK_PTR(skel, "test_ringbuf_bpf_to_bpf_lskel__open"))
+ return;
+
+ skel->maps.ringbuf.max_entries = getpagesize();
+ skel->bss->pid = getpid();
+
+ err = test_ringbuf_bpf_to_bpf_lskel__load(skel);
+ if (!ASSERT_OK(err, "test_ringbuf_bpf_to_bpf_lskel__load"))
+ goto cleanup;
+
+ ringbuf = ring_buffer__new(skel->maps.ringbuf.map_fd, NULL, NULL, NULL);
+ if (!ASSERT_OK_PTR(ringbuf, "ring_buffer__new"))
+ goto cleanup;
+
+ err = test_ringbuf_bpf_to_bpf_lskel__attach(skel);
+ if (!ASSERT_OK(err, "test_ringbuf_bpf_to_bpf_lskel__attach"))
+ goto cleanup_ringbuf;
+
+ /* Produce N_SAMPLES samples in the ring buffer by calling getpid() */
+ skel->bss->value = SAMPLE_VALUE;
+ for (i = 0; i < N_SAMPLES; i++)
+ syscall(__NR_getpgid);
+
+ /* Trigger bpf-side consumption */
+ syscall(__NR_prctl);
+
+ /* Check correct number samples were consumed */
+ if (!ASSERT_EQ(skel->bss->round_tripped, N_SAMPLES, "round_tripped"))
+ goto cleanup_ringbuf;
+
+ /* Check all samples were consumed */
+ err = ring_buffer__consume(ringbuf);
+ if (!ASSERT_EQ(err, 0, "rb_consume"))
+ goto cleanup_ringbuf;
+
+cleanup_ringbuf:
+ ring_buffer__free(ringbuf);
+cleanup:
+ test_ringbuf_bpf_to_bpf_lskel__destroy(skel);
+}
+
void test_ringbuf(void)
{
if (test__start_subtest("ringbuf"))
@@ -507,4 +555,6 @@ void test_ringbuf(void)
ringbuf_map_key_subtest();
if (test__start_subtest("ringbuf_write"))
ringbuf_write_subtest();
+ if (test__start_subtest("ringbuf_bpf_to_bpf"))
+ ringbuf_bpf_to_bpf_subtest();
}
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_bpf_to_bpf.c b/tools/testing/selftests/bpf/progs/test_ringbuf_bpf_to_bpf.c
new file mode 100644
index 000000000000..378154922024
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf_bpf_to_bpf.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <unistd.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct sample {
+ long value;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+} ringbuf SEC(".maps");
+
+int pid = 0;
+long value = 0;
+int round_tripped = 0;
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int test_ringbuf_bpf_to_bpf_produce(void *ctx)
+{
+ int cur_pid = bpf_get_current_pid_tgid() >> 32;
+ struct sample *sample;
+
+ if (cur_pid != pid)
+ return 0;
+
+ sample = bpf_ringbuf_reserve(&ringbuf, sizeof(*sample), 0);
+ if (!sample)
+ return 0;
+ sample->value = value;
+
+ bpf_ringbuf_submit(sample, 0);
+ return 0;
+}
+
+static long consume_cb(struct bpf_dynptr *dynptr, void *context)
+{
+ struct sample *sample = NULL;
+
+ sample = bpf_dynptr_data(dynptr, 0, sizeof(*sample));
+ if (!sample)
+ return 0;
+
+ if (sample->value == value)
+ round_tripped++;
+
+ return 0;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_prctl")
+int test_ringbuf_bpf_to_bpf_consume(void *ctx)
+{
+ int cur_pid = bpf_get_current_pid_tgid() >> 32;
+
+ if (cur_pid != pid)
+ return 0;
+
+ bpf_user_ringbuf_drain(&ringbuf, consume_cb, NULL, 0);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
--
2.46.0
On Mon, Sep 9, 2024 at 5:55 PM Daniel Xu <dxu@dxuuu.xyz> wrote: > > Right now there exists prog produce / userspace consume and userspace > produce / prog consume support. But it is also useful to have prog > produce / prog consume. > > For example, we want to track the latency overhead of cpumap in > production. Since we need to store enqueue timestamps somewhere and > cpumap is MPSC, we need an MPSC data structure to shadow cpumap. BPF > ringbuf is such a data structure. Rather than reimplement (possibly > poorly) a custom ringbuffer in BPF, extend the existing interface to > allow the final quadrant of ringbuf usecases to be filled. Note we > ignore userspace to userspace use case - there is no need to involve > kernel for that. > > Signed-off-by: Daniel Xu <dxu@dxuuu.xyz> > --- > kernel/bpf/verifier.c | 6 +- > tools/testing/selftests/bpf/Makefile | 3 +- > .../selftests/bpf/prog_tests/ringbuf.c | 50 +++++++++++++++ > .../bpf/progs/test_ringbuf_bpf_to_bpf.c | 64 +++++++++++++++++++ > 4 files changed, 120 insertions(+), 3 deletions(-) > create mode 100644 tools/testing/selftests/bpf/progs/test_ringbuf_bpf_to_bpf.c > > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c > index 53d0556fbbf3..56bfe559f228 100644 > --- a/kernel/bpf/verifier.c > +++ b/kernel/bpf/verifier.c > @@ -9142,7 +9142,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, > func_id != BPF_FUNC_ringbuf_query && > func_id != BPF_FUNC_ringbuf_reserve_dynptr && > func_id != BPF_FUNC_ringbuf_submit_dynptr && > - func_id != BPF_FUNC_ringbuf_discard_dynptr) > + func_id != BPF_FUNC_ringbuf_discard_dynptr && > + func_id != BPF_FUNC_user_ringbuf_drain) > goto error; > break; > case BPF_MAP_TYPE_USER_RINGBUF: > @@ -9276,7 +9277,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, > goto error; > break; > case BPF_FUNC_user_ringbuf_drain: > - if (map->map_type != BPF_MAP_TYPE_USER_RINGBUF) > + if (map->map_type != BPF_MAP_TYPE_USER_RINGBUF && > + map->map_type != BPF_MAP_TYPE_RINGBUF) > goto error; I think it should work. Andrii, do you see any issues with such use? > break; > case BPF_FUNC_get_stackid: > diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile > index 9905e3739dd0..233109843d4d 100644 > --- a/tools/testing/selftests/bpf/Makefile > +++ b/tools/testing/selftests/bpf/Makefile > @@ -503,7 +503,8 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \ > LSKELS := fentry_test.c fexit_test.c fexit_sleep.c atomics.c \ > trace_printk.c trace_vprintk.c map_ptr_kern.c \ > core_kern.c core_kern_overflow.c test_ringbuf.c \ > - test_ringbuf_n.c test_ringbuf_map_key.c test_ringbuf_write.c > + test_ringbuf_n.c test_ringbuf_map_key.c test_ringbuf_write.c \ > + test_ringbuf_bpf_to_bpf.c Do you need it to be lskel ? Regular skels are either to debug. Also pls split selftest into a separate patch. > > # Generate both light skeleton and libbpf skeleton for these > LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c kfunc_call_test.c \ > diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c > index da430df45aa4..3e7955573ac5 100644 > --- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c > +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c > @@ -17,6 +17,7 @@ > #include "test_ringbuf_n.lskel.h" > #include "test_ringbuf_map_key.lskel.h" > #include "test_ringbuf_write.lskel.h" > +#include "test_ringbuf_bpf_to_bpf.lskel.h" > > #define EDONE 7777 > > @@ -497,6 +498,53 @@ static void ringbuf_map_key_subtest(void) > test_ringbuf_map_key_lskel__destroy(skel_map_key); > } > > +static void ringbuf_bpf_to_bpf_subtest(void) > +{ > + struct test_ringbuf_bpf_to_bpf_lskel *skel; > + int err, i; > + > + skel = test_ringbuf_bpf_to_bpf_lskel__open(); > + if (!ASSERT_OK_PTR(skel, "test_ringbuf_bpf_to_bpf_lskel__open")) > + return; > + > + skel->maps.ringbuf.max_entries = getpagesize(); > + skel->bss->pid = getpid(); > + > + err = test_ringbuf_bpf_to_bpf_lskel__load(skel); > + if (!ASSERT_OK(err, "test_ringbuf_bpf_to_bpf_lskel__load")) > + goto cleanup; > + > + ringbuf = ring_buffer__new(skel->maps.ringbuf.map_fd, NULL, NULL, NULL); > + if (!ASSERT_OK_PTR(ringbuf, "ring_buffer__new")) > + goto cleanup; > + > + err = test_ringbuf_bpf_to_bpf_lskel__attach(skel); > + if (!ASSERT_OK(err, "test_ringbuf_bpf_to_bpf_lskel__attach")) > + goto cleanup_ringbuf; > + > + /* Produce N_SAMPLES samples in the ring buffer by calling getpid() */ > + skel->bss->value = SAMPLE_VALUE; > + for (i = 0; i < N_SAMPLES; i++) > + syscall(__NR_getpgid); > + > + /* Trigger bpf-side consumption */ > + syscall(__NR_prctl); This might conflict with other selftests that run in parallel. Just load the skel and bpf_prog_run(prog_fd). No need to attach anywhere in the kernel. pw-bot: cr
On Tue, Sep 10, 2024 at 11:36:36AM GMT, Alexei Starovoitov wrote: > On Mon, Sep 9, 2024 at 5:55 PM Daniel Xu <dxu@dxuuu.xyz> wrote: > > > > Right now there exists prog produce / userspace consume and userspace > > produce / prog consume support. But it is also useful to have prog > > produce / prog consume. > > > > For example, we want to track the latency overhead of cpumap in > > production. Since we need to store enqueue timestamps somewhere and > > cpumap is MPSC, we need an MPSC data structure to shadow cpumap. BPF > > ringbuf is such a data structure. Rather than reimplement (possibly > > poorly) a custom ringbuffer in BPF, extend the existing interface to > > allow the final quadrant of ringbuf usecases to be filled. Note we > > ignore userspace to userspace use case - there is no need to involve > > kernel for that. > > > > Signed-off-by: Daniel Xu <dxu@dxuuu.xyz> > > --- > > kernel/bpf/verifier.c | 6 +- > > tools/testing/selftests/bpf/Makefile | 3 +- > > .../selftests/bpf/prog_tests/ringbuf.c | 50 +++++++++++++++ > > .../bpf/progs/test_ringbuf_bpf_to_bpf.c | 64 +++++++++++++++++++ > > 4 files changed, 120 insertions(+), 3 deletions(-) > > create mode 100644 tools/testing/selftests/bpf/progs/test_ringbuf_bpf_to_bpf.c > > > > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c > > index 53d0556fbbf3..56bfe559f228 100644 > > --- a/kernel/bpf/verifier.c > > +++ b/kernel/bpf/verifier.c > > @@ -9142,7 +9142,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, > > func_id != BPF_FUNC_ringbuf_query && > > func_id != BPF_FUNC_ringbuf_reserve_dynptr && > > func_id != BPF_FUNC_ringbuf_submit_dynptr && > > - func_id != BPF_FUNC_ringbuf_discard_dynptr) > > + func_id != BPF_FUNC_ringbuf_discard_dynptr && > > + func_id != BPF_FUNC_user_ringbuf_drain) > > goto error; > > break; > > case BPF_MAP_TYPE_USER_RINGBUF: > > @@ -9276,7 +9277,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, > > goto error; > > break; > > case BPF_FUNC_user_ringbuf_drain: > > - if (map->map_type != BPF_MAP_TYPE_USER_RINGBUF) > > + if (map->map_type != BPF_MAP_TYPE_USER_RINGBUF && > > + map->map_type != BPF_MAP_TYPE_RINGBUF) > > goto error; > > I think it should work. > > Andrii, > > do you see any issues with such use? > > > break; > > case BPF_FUNC_get_stackid: > > diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile > > index 9905e3739dd0..233109843d4d 100644 > > --- a/tools/testing/selftests/bpf/Makefile > > +++ b/tools/testing/selftests/bpf/Makefile > > @@ -503,7 +503,8 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \ > > LSKELS := fentry_test.c fexit_test.c fexit_sleep.c atomics.c \ > > trace_printk.c trace_vprintk.c map_ptr_kern.c \ > > core_kern.c core_kern_overflow.c test_ringbuf.c \ > > - test_ringbuf_n.c test_ringbuf_map_key.c test_ringbuf_write.c > > + test_ringbuf_n.c test_ringbuf_map_key.c test_ringbuf_write.c \ > > + test_ringbuf_bpf_to_bpf.c > > Do you need it to be lskel ? > > Regular skels are either to debug. I'm actually unsure the difference, still. But all the other tests in the file were using lskel so I just copy/pasted. > > Also pls split selftest into a separate patch. Ack. > > > > > # Generate both light skeleton and libbpf skeleton for these > > LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c kfunc_call_test.c \ > > diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c > > index da430df45aa4..3e7955573ac5 100644 > > --- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c > > +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c > > @@ -17,6 +17,7 @@ > > #include "test_ringbuf_n.lskel.h" > > #include "test_ringbuf_map_key.lskel.h" > > #include "test_ringbuf_write.lskel.h" > > +#include "test_ringbuf_bpf_to_bpf.lskel.h" > > > > #define EDONE 7777 > > > > @@ -497,6 +498,53 @@ static void ringbuf_map_key_subtest(void) > > test_ringbuf_map_key_lskel__destroy(skel_map_key); > > } > > > > +static void ringbuf_bpf_to_bpf_subtest(void) > > +{ > > + struct test_ringbuf_bpf_to_bpf_lskel *skel; > > + int err, i; > > + > > + skel = test_ringbuf_bpf_to_bpf_lskel__open(); > > + if (!ASSERT_OK_PTR(skel, "test_ringbuf_bpf_to_bpf_lskel__open")) > > + return; > > + > > + skel->maps.ringbuf.max_entries = getpagesize(); > > + skel->bss->pid = getpid(); > > + > > + err = test_ringbuf_bpf_to_bpf_lskel__load(skel); > > + if (!ASSERT_OK(err, "test_ringbuf_bpf_to_bpf_lskel__load")) > > + goto cleanup; > > + > > + ringbuf = ring_buffer__new(skel->maps.ringbuf.map_fd, NULL, NULL, NULL); > > + if (!ASSERT_OK_PTR(ringbuf, "ring_buffer__new")) > > + goto cleanup; > > + > > + err = test_ringbuf_bpf_to_bpf_lskel__attach(skel); > > + if (!ASSERT_OK(err, "test_ringbuf_bpf_to_bpf_lskel__attach")) > > + goto cleanup_ringbuf; > > + > > + /* Produce N_SAMPLES samples in the ring buffer by calling getpid() */ > > + skel->bss->value = SAMPLE_VALUE; > > + for (i = 0; i < N_SAMPLES; i++) > > + syscall(__NR_getpgid); > > + > > + /* Trigger bpf-side consumption */ > > + syscall(__NR_prctl); > > This might conflict with other selftests that run in parallel. > > Just load the skel and bpf_prog_run(prog_fd). > No need to attach anywhere in the kernel. Ack. > > pw-bot: cr
On Tue, Sep 10, 2024 at 11:36 AM Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote: > > On Mon, Sep 9, 2024 at 5:55 PM Daniel Xu <dxu@dxuuu.xyz> wrote: > > > > Right now there exists prog produce / userspace consume and userspace > > produce / prog consume support. But it is also useful to have prog > > produce / prog consume. > > > > For example, we want to track the latency overhead of cpumap in > > production. Since we need to store enqueue timestamps somewhere and > > cpumap is MPSC, we need an MPSC data structure to shadow cpumap. BPF > > ringbuf is such a data structure. Rather than reimplement (possibly > > poorly) a custom ringbuffer in BPF, extend the existing interface to > > allow the final quadrant of ringbuf usecases to be filled. Note we > > ignore userspace to userspace use case - there is no need to involve > > kernel for that. > > > > Signed-off-by: Daniel Xu <dxu@dxuuu.xyz> > > --- > > kernel/bpf/verifier.c | 6 +- > > tools/testing/selftests/bpf/Makefile | 3 +- > > .../selftests/bpf/prog_tests/ringbuf.c | 50 +++++++++++++++ > > .../bpf/progs/test_ringbuf_bpf_to_bpf.c | 64 +++++++++++++++++++ > > 4 files changed, 120 insertions(+), 3 deletions(-) > > create mode 100644 tools/testing/selftests/bpf/progs/test_ringbuf_bpf_to_bpf.c > > > > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c > > index 53d0556fbbf3..56bfe559f228 100644 > > --- a/kernel/bpf/verifier.c > > +++ b/kernel/bpf/verifier.c > > @@ -9142,7 +9142,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, > > func_id != BPF_FUNC_ringbuf_query && > > func_id != BPF_FUNC_ringbuf_reserve_dynptr && > > func_id != BPF_FUNC_ringbuf_submit_dynptr && > > - func_id != BPF_FUNC_ringbuf_discard_dynptr) > > + func_id != BPF_FUNC_ringbuf_discard_dynptr && > > + func_id != BPF_FUNC_user_ringbuf_drain) > > goto error; > > break; > > case BPF_MAP_TYPE_USER_RINGBUF: > > @@ -9276,7 +9277,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, > > goto error; > > break; > > case BPF_FUNC_user_ringbuf_drain: > > - if (map->map_type != BPF_MAP_TYPE_USER_RINGBUF) > > + if (map->map_type != BPF_MAP_TYPE_USER_RINGBUF && > > + map->map_type != BPF_MAP_TYPE_RINGBUF) > > goto error; > > I think it should work. > > Andrii, > > do you see any issues with such use? > Not from a quick glance. Both ringbufs have the same memory layout, so user_ringbuf_drain() should probably work fine for normal BPF ringbuf (and either way bpf_user_ringbuf_drain() has to protect from malicious user space, so its code is pretty unassuming). We should make it very explicit, though, that the user is responsible for making sure that bpf_user_ringbuf_drain() will not be called simultaneously in two threads, kernel or user space. Also, Daniel, can you please make sure that dynptr we return for each sample is read-only? We shouldn't let consumer BPF program ability to corrupt ringbuf record headers (accidentally or otherwise). And as a thought exercise. I wonder what would it take to have an open-coded iterator returning these read-only dynptrs for each consumed record? Maybe we already have all the pieces together. So consider looking into that as well. P.S. And yeah "user_" part in helper name is kind of unfortunate given it will work for both ringbufs. Can/should we add some sort of alias for this helper so it can be used with both bpf_user_ringbuf_drain() and bpf_ringbuf_drain() names? > > break; > > case BPF_FUNC_get_stackid: > > diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile > > index 9905e3739dd0..233109843d4d 100644 > > --- a/tools/testing/selftests/bpf/Makefile > > +++ b/tools/testing/selftests/bpf/Makefile > > @@ -503,7 +503,8 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \ > > LSKELS := fentry_test.c fexit_test.c fexit_sleep.c atomics.c \ > > trace_printk.c trace_vprintk.c map_ptr_kern.c \ > > core_kern.c core_kern_overflow.c test_ringbuf.c \ > > - test_ringbuf_n.c test_ringbuf_map_key.c test_ringbuf_write.c > > + test_ringbuf_n.c test_ringbuf_map_key.c test_ringbuf_write.c \ > > + test_ringbuf_bpf_to_bpf.c [...]
On Tue, Sep 10, 2024 at 01:41:41PM GMT, Andrii Nakryiko wrote: > On Tue, Sep 10, 2024 at 11:36 AM Alexei Starovoitov > <alexei.starovoitov@gmail.com> wrote: > > > > On Mon, Sep 9, 2024 at 5:55 PM Daniel Xu <dxu@dxuuu.xyz> wrote: > > > > > > Right now there exists prog produce / userspace consume and userspace > > > produce / prog consume support. But it is also useful to have prog > > > produce / prog consume. > > > > > > For example, we want to track the latency overhead of cpumap in > > > production. Since we need to store enqueue timestamps somewhere and > > > cpumap is MPSC, we need an MPSC data structure to shadow cpumap. BPF > > > ringbuf is such a data structure. Rather than reimplement (possibly > > > poorly) a custom ringbuffer in BPF, extend the existing interface to > > > allow the final quadrant of ringbuf usecases to be filled. Note we > > > ignore userspace to userspace use case - there is no need to involve > > > kernel for that. > > > > > > Signed-off-by: Daniel Xu <dxu@dxuuu.xyz> > > > --- > > > kernel/bpf/verifier.c | 6 +- > > > tools/testing/selftests/bpf/Makefile | 3 +- > > > .../selftests/bpf/prog_tests/ringbuf.c | 50 +++++++++++++++ > > > .../bpf/progs/test_ringbuf_bpf_to_bpf.c | 64 +++++++++++++++++++ > > > 4 files changed, 120 insertions(+), 3 deletions(-) > > > create mode 100644 tools/testing/selftests/bpf/progs/test_ringbuf_bpf_to_bpf.c > > > > > > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c > > > index 53d0556fbbf3..56bfe559f228 100644 > > > --- a/kernel/bpf/verifier.c > > > +++ b/kernel/bpf/verifier.c > > > @@ -9142,7 +9142,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, > > > func_id != BPF_FUNC_ringbuf_query && > > > func_id != BPF_FUNC_ringbuf_reserve_dynptr && > > > func_id != BPF_FUNC_ringbuf_submit_dynptr && > > > - func_id != BPF_FUNC_ringbuf_discard_dynptr) > > > + func_id != BPF_FUNC_ringbuf_discard_dynptr && > > > + func_id != BPF_FUNC_user_ringbuf_drain) > > > goto error; > > > break; > > > case BPF_MAP_TYPE_USER_RINGBUF: > > > @@ -9276,7 +9277,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, > > > goto error; > > > break; > > > case BPF_FUNC_user_ringbuf_drain: > > > - if (map->map_type != BPF_MAP_TYPE_USER_RINGBUF) > > > + if (map->map_type != BPF_MAP_TYPE_USER_RINGBUF && > > > + map->map_type != BPF_MAP_TYPE_RINGBUF) > > > goto error; > > > > I think it should work. > > > > Andrii, > > > > do you see any issues with such use? > > > > Not from a quick glance. Both ringbufs have the same memory layout, so > user_ringbuf_drain() should probably work fine for normal BPF ringbuf > (and either way bpf_user_ringbuf_drain() has to protect from malicious > user space, so its code is pretty unassuming). > > We should make it very explicit, though, that the user is responsible > for making sure that bpf_user_ringbuf_drain() will not be called > simultaneously in two threads, kernel or user space. I see an atomic_try_cmpxchg() protecting the drain. So it should be safe, right? What are they supposed to expect? > > Also, Daniel, can you please make sure that dynptr we return for each > sample is read-only? We shouldn't let consumer BPF program ability to > corrupt ringbuf record headers (accidentally or otherwise). Sure. > > And as a thought exercise. I wonder what would it take to have an > open-coded iterator returning these read-only dynptrs for each > consumed record? Maybe we already have all the pieces together. So > consider looking into that as well. > > P.S. And yeah "user_" part in helper name is kind of unfortunate given > it will work for both ringbufs. Can/should we add some sort of alias > for this helper so it can be used with both bpf_user_ringbuf_drain() > and bpf_ringbuf_drain() names? You mean register a new helper that shares the impl? Easy enough, but I thought we didn't want to add more uapi helpers. Thanks, Daniel
On Tue, Sep 10, 2024 at 2:07 PM Daniel Xu <dxu@dxuuu.xyz> wrote: > > On Tue, Sep 10, 2024 at 01:41:41PM GMT, Andrii Nakryiko wrote: > > On Tue, Sep 10, 2024 at 11:36 AM Alexei Starovoitov > > <alexei.starovoitov@gmail.com> wrote: > > > > > > On Mon, Sep 9, 2024 at 5:55 PM Daniel Xu <dxu@dxuuu.xyz> wrote: > > > > > > > > Right now there exists prog produce / userspace consume and userspace > > > > produce / prog consume support. But it is also useful to have prog > > > > produce / prog consume. > > > > > > > > For example, we want to track the latency overhead of cpumap in > > > > production. Since we need to store enqueue timestamps somewhere and > > > > cpumap is MPSC, we need an MPSC data structure to shadow cpumap. BPF > > > > ringbuf is such a data structure. Rather than reimplement (possibly > > > > poorly) a custom ringbuffer in BPF, extend the existing interface to > > > > allow the final quadrant of ringbuf usecases to be filled. Note we > > > > ignore userspace to userspace use case - there is no need to involve > > > > kernel for that. > > > > > > > > Signed-off-by: Daniel Xu <dxu@dxuuu.xyz> > > > > --- > > > > kernel/bpf/verifier.c | 6 +- > > > > tools/testing/selftests/bpf/Makefile | 3 +- > > > > .../selftests/bpf/prog_tests/ringbuf.c | 50 +++++++++++++++ > > > > .../bpf/progs/test_ringbuf_bpf_to_bpf.c | 64 +++++++++++++++++++ > > > > 4 files changed, 120 insertions(+), 3 deletions(-) > > > > create mode 100644 tools/testing/selftests/bpf/progs/test_ringbuf_bpf_to_bpf.c > > > > > > > > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c > > > > index 53d0556fbbf3..56bfe559f228 100644 > > > > --- a/kernel/bpf/verifier.c > > > > +++ b/kernel/bpf/verifier.c > > > > @@ -9142,7 +9142,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, > > > > func_id != BPF_FUNC_ringbuf_query && > > > > func_id != BPF_FUNC_ringbuf_reserve_dynptr && > > > > func_id != BPF_FUNC_ringbuf_submit_dynptr && > > > > - func_id != BPF_FUNC_ringbuf_discard_dynptr) > > > > + func_id != BPF_FUNC_ringbuf_discard_dynptr && > > > > + func_id != BPF_FUNC_user_ringbuf_drain) > > > > goto error; > > > > break; > > > > case BPF_MAP_TYPE_USER_RINGBUF: > > > > @@ -9276,7 +9277,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, > > > > goto error; > > > > break; > > > > case BPF_FUNC_user_ringbuf_drain: > > > > - if (map->map_type != BPF_MAP_TYPE_USER_RINGBUF) > > > > + if (map->map_type != BPF_MAP_TYPE_USER_RINGBUF && > > > > + map->map_type != BPF_MAP_TYPE_RINGBUF) > > > > goto error; > > > > > > I think it should work. > > > > > > Andrii, > > > > > > do you see any issues with such use? > > > > > > > Not from a quick glance. Both ringbufs have the same memory layout, so > > user_ringbuf_drain() should probably work fine for normal BPF ringbuf > > (and either way bpf_user_ringbuf_drain() has to protect from malicious > > user space, so its code is pretty unassuming). > > > > We should make it very explicit, though, that the user is responsible > > for making sure that bpf_user_ringbuf_drain() will not be called > > simultaneously in two threads, kernel or user space. > > I see an atomic_try_cmpxchg() protecting the drain. So it should be > safe, right? What are they supposed to expect? User space can ignore rb->busy and start consuming in parallel. Ok, given we had this atomic_try_cmpxchg() it was rather OK for user ringbuf, but it's not so great for BPF ringbuf, way too easy to screw up... > > > > > Also, Daniel, can you please make sure that dynptr we return for each > > sample is read-only? We shouldn't let consumer BPF program ability to > > corrupt ringbuf record headers (accidentally or otherwise). > > Sure. > > > > > And as a thought exercise. I wonder what would it take to have an > > open-coded iterator returning these read-only dynptrs for each > > consumed record? Maybe we already have all the pieces together. So > > consider looking into that as well. > > > > P.S. And yeah "user_" part in helper name is kind of unfortunate given > > it will work for both ringbufs. Can/should we add some sort of alias > > for this helper so it can be used with both bpf_user_ringbuf_drain() > > and bpf_ringbuf_drain() names? > > You mean register a new helper that shares the impl? Easy enough, but I > thought we didn't want to add more uapi helpers. No, not a new helper. Just an alternative name for it, "bpf_ringbuf_drain()". Might not be worth doing, I haven't checked what would be the best way to do this, can't tell right now. > > Thanks, > Daniel
© 2016 - 2024 Red Hat, Inc.