The bpf_perf_event_aux_pause kfunc is introduced for pause and resume
the Perf AUX trace used by eBPF programs.
An attached tracepoint (e.g., ftrace tracepoint or a dynamic tracepoint
using uprobe or kprobe) can invoke bpf_perf_event_aux_pause() to pause
or resume AUX trace. This is useful for fine-grained tracing.
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
kernel/trace/bpf_trace.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 60 insertions(+)
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 3ae52978cae61a5d60b43c764d3e267bd32e1085..d024e0d0de4a9496e10e2b1a1fbe44434a824a0f 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -704,6 +704,66 @@ static const struct bpf_func_proto bpf_perf_event_output_proto = {
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
};
+__bpf_kfunc_start_defs();
+
+__bpf_kfunc int bpf_perf_event_aux_pause(void *p__map, u64 flags, u32 pause)
+{
+ struct bpf_map *map = p__map;
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
+ unsigned int cpu = smp_processor_id();
+ u64 index = flags & BPF_F_INDEX_MASK;
+ struct bpf_event_entry *ee;
+ int ret = 0;
+
+ /* Disabling IRQ avoids race condition with perf event flows. */
+ guard(irqsave)();
+
+ if (unlikely(flags & ~(BPF_F_INDEX_MASK))) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (index == BPF_F_CURRENT_CPU)
+ index = cpu;
+
+ if (unlikely(index >= array->map.max_entries)) {
+ ret = -E2BIG;
+ goto out;
+ }
+
+ ee = READ_ONCE(array->ptrs[index]);
+ if (!ee) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ if (!has_aux(ee->event))
+ ret = -EINVAL;
+
+ perf_event_aux_pause(ee->event, pause);
+out:
+ return ret;
+}
+
+__bpf_kfunc_end_defs();
+
+BTF_KFUNCS_START(perf_event_kfunc_set_ids)
+BTF_ID_FLAGS(func, bpf_perf_event_aux_pause, KF_TRUSTED_ARGS)
+BTF_KFUNCS_END(perf_event_kfunc_set_ids)
+
+static const struct btf_kfunc_id_set bpf_perf_event_kfunc_set = {
+ .owner = THIS_MODULE,
+ .set = &perf_event_kfunc_set_ids,
+};
+
+static int __init bpf_perf_event_kfuncs_init(void)
+{
+ return register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC,
+ &bpf_perf_event_kfunc_set);
+}
+
+late_initcall(bpf_perf_event_kfuncs_init);
+
static DEFINE_PER_CPU(int, bpf_event_output_nest_level);
struct bpf_nested_pt_regs {
struct pt_regs regs[3];
--
2.34.1
On Fri, 2025-07-18 at 16:25 +0100, Leo Yan wrote:
[...]
> +__bpf_kfunc int bpf_perf_event_aux_pause(void *p__map, u64 flags, u32 pause)
> +{
> + struct bpf_map *map = p__map;
> + struct bpf_array *array = container_of(map, struct bpf_array, map);
Verifier makes sure that p__map is a not null pointer to an object of
type bpf_map, but it does not guarantee that the object is an instance
of bpf_array.
You need to check map->type, same way bpf_arena_alloc_pages() does.
> + unsigned int cpu = smp_processor_id();
> + u64 index = flags & BPF_F_INDEX_MASK;
> + struct bpf_event_entry *ee;
> + int ret = 0;
> +
> + /* Disabling IRQ avoids race condition with perf event flows. */
> + guard(irqsave)();
> +
> + if (unlikely(flags & ~(BPF_F_INDEX_MASK))) {
> + ret = -EINVAL;
> + goto out;
> + }
> +
> + if (index == BPF_F_CURRENT_CPU)
> + index = cpu;
> +
> + if (unlikely(index >= array->map.max_entries)) {
> + ret = -E2BIG;
> + goto out;
> + }
> +
> + ee = READ_ONCE(array->ptrs[index]);
> + if (!ee) {
> + ret = -ENOENT;
> + goto out;
> + }
> +
> + if (!has_aux(ee->event))
> + ret = -EINVAL;
> +
> + perf_event_aux_pause(ee->event, pause);
> +out:
> + return ret;
> +}
[...]
Hi Eduard,
On Mon, Jul 21, 2025 at 03:38:59PM -0700, Eduard Zingerman wrote:
> On Fri, 2025-07-18 at 16:25 +0100, Leo Yan wrote:
>
> [...]
>
> > +__bpf_kfunc int bpf_perf_event_aux_pause(void *p__map, u64 flags, u32 pause)
> > +{
> > + struct bpf_map *map = p__map;
> > + struct bpf_array *array = container_of(map, struct bpf_array, map);
>
> Verifier makes sure that p__map is a not null pointer to an object of
> type bpf_map, but it does not guarantee that the object is an instance
> of bpf_array.
> You need to check map->type, same way bpf_arena_alloc_pages() does.
Makes sense. Will do.
> > + unsigned int cpu = smp_processor_id();
> > + u64 index = flags & BPF_F_INDEX_MASK;
> > + struct bpf_event_entry *ee;
> > + int ret = 0;
> > +
> > + /* Disabling IRQ avoids race condition with perf event flows. */
> > + guard(irqsave)();
> > +
> > + if (unlikely(flags & ~(BPF_F_INDEX_MASK))) {
> > + ret = -EINVAL;
> > + goto out;
> > + }
> > +
> > + if (index == BPF_F_CURRENT_CPU)
> > + index = cpu;
> > +
> > + if (unlikely(index >= array->map.max_entries)) {
> > + ret = -E2BIG;
> > + goto out;
> > + }
> > +
> > + ee = READ_ONCE(array->ptrs[index]);
> > + if (!ee) {
> > + ret = -ENOENT;
> > + goto out;
> > + }
> > +
> > + if (!has_aux(ee->event))
> > + ret = -EINVAL;
I should refactor a bit for removing "goto out" and directly return
error cases.
Thanks for review and your suggestion in another reply.
Leo
> > +
> > + perf_event_aux_pause(ee->event, pause);
> > +out:
> > + return ret;
> > +}
>
> [...]
© 2016 - 2026 Red Hat, Inc.