The bpf_perf_event_aux_pause kfunc will be used to control the Perf AUX
area to pause or resume.
An example use-case is attaching eBPF to Ftrace tracepoints. When a
tracepoint is hit, the associated eBPF program will be executed. The
eBPF program can invoke bpf_perf_event_aux_pause() to pause or resume
AUX trace. This is useful for fine-grained tracing by combining
Perf and eBPF.
This commit implements the bpf_perf_event_aux_pause kfunc, and make it
pass the eBPF verifier.
Signed-off-by: Leo Yan <leo.yan@arm.com>
---
include/uapi/linux/bpf.h | 21 ++++++++++++++++
kernel/bpf/verifier.c | 2 ++
kernel/trace/bpf_trace.c | 52 ++++++++++++++++++++++++++++++++++++++++
3 files changed, 75 insertions(+)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4162afc6b5d0..678278c91ce2 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5795,6 +5795,26 @@ union bpf_attr {
* 0 on success.
*
* **-ENOENT** if the bpf_local_storage cannot be found.
+ *
+ * long bpf_perf_event_aux_pause(struct bpf_map *map, u64 flags, u32 pause)
+ * Description
+ * Pause or resume an AUX area trace associated to the perf event.
+ *
+ * The *flags* argument is specified as the key value for
+ * retrieving event pointer from the passed *map*.
+ *
+ * The *pause* argument controls AUX trace pause or resume.
+ * Non-zero values (true) are to pause the AUX trace and the zero
+ * value (false) is for re-enabling the AUX trace.
+ * Return
+ * 0 on success.
+ *
+ * **-ENOENT** if not found event in the events map.
+ *
+ * **-E2BIG** if the event index passed in the *flags* parameter
+ * is out-of-range of the map.
+ *
+ * **-EINVAL** if the flags passed is an invalid value.
*/
#define ___BPF_FUNC_MAPPER(FN, ctx...) \
FN(unspec, 0, ##ctx) \
@@ -6009,6 +6029,7 @@ union bpf_attr {
FN(user_ringbuf_drain, 209, ##ctx) \
FN(cgrp_storage_get, 210, ##ctx) \
FN(cgrp_storage_delete, 211, ##ctx) \
+ FN(perf_event_aux_pause, 212, ##ctx) \
/* */
/* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 09f7fa635f67..1f3acd8a7de3 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -9315,6 +9315,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
func_id != BPF_FUNC_perf_event_output &&
func_id != BPF_FUNC_skb_output &&
func_id != BPF_FUNC_perf_event_read_value &&
+ func_id != BPF_FUNC_perf_event_aux_pause &&
func_id != BPF_FUNC_xdp_output)
goto error;
break;
@@ -9443,6 +9444,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
case BPF_FUNC_perf_event_read:
case BPF_FUNC_perf_event_output:
case BPF_FUNC_perf_event_read_value:
+ case BPF_FUNC_perf_event_aux_pause:
case BPF_FUNC_skb_output:
case BPF_FUNC_xdp_output:
if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 949a3870946c..a3b857f6cab4 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -617,6 +617,56 @@ static const struct bpf_func_proto bpf_perf_event_read_value_proto = {
.arg4_type = ARG_CONST_SIZE,
};
+BPF_CALL_3(bpf_perf_event_aux_pause, struct bpf_map *, map, u64, flags,
+ u32, pause)
+{
+ unsigned long irq_flags;
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
+ unsigned int cpu = smp_processor_id();
+ u64 index = flags & BPF_F_INDEX_MASK;
+ struct bpf_event_entry *ee;
+ int ret = 0;
+
+ /*
+ * Disabling interrupts avoids scheduling and race condition with
+ * perf event enabling and disabling flow.
+ */
+ local_irq_save(irq_flags);
+
+ if (unlikely(flags & ~(BPF_F_INDEX_MASK))) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (index == BPF_F_CURRENT_CPU)
+ index = cpu;
+ if (unlikely(index >= array->map.max_entries)) {
+ ret = -E2BIG;
+ goto out;
+ }
+
+ ee = READ_ONCE(array->ptrs[index]);
+ if (!ee) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ perf_event_aux_pause(ee->event, pause);
+
+out:
+ local_irq_restore(irq_flags);
+ return ret;
+}
+
+static const struct bpf_func_proto bpf_perf_event_aux_pause_proto = {
+ .func = bpf_perf_event_aux_pause,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_CONST_MAP_PTR,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+};
+
static __always_inline u64
__bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
u64 flags, struct perf_sample_data *sd)
@@ -1565,6 +1615,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_find_vma_proto;
case BPF_FUNC_trace_vprintk:
return bpf_get_trace_vprintk_proto();
+ case BPF_FUNC_perf_event_aux_pause:
+ return &bpf_perf_event_aux_pause_proto;
default:
return bpf_base_func_proto(func_id, prog);
}
--
2.34.1
On 12/15/24 11:34 AM, Leo Yan wrote:
> The bpf_perf_event_aux_pause kfunc will be used to control the Perf AUX
> area to pause or resume.
>
> An example use-case is attaching eBPF to Ftrace tracepoints. When a
> tracepoint is hit, the associated eBPF program will be executed. The
> eBPF program can invoke bpf_perf_event_aux_pause() to pause or resume
> AUX trace. This is useful for fine-grained tracing by combining
> Perf and eBPF.
>
> This commit implements the bpf_perf_event_aux_pause kfunc, and make it
> pass the eBPF verifier.
The subject and commit message mentions to implement a kfunc,
but actually you implemented a uapi helper. Please implement a kfunc
instead (searching __bpf_kfunc in kernel/bpf directory).
>
> Signed-off-by: Leo Yan <leo.yan@arm.com>
> ---
> include/uapi/linux/bpf.h | 21 ++++++++++++++++
> kernel/bpf/verifier.c | 2 ++
> kernel/trace/bpf_trace.c | 52 ++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 75 insertions(+)
>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 4162afc6b5d0..678278c91ce2 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -5795,6 +5795,26 @@ union bpf_attr {
> * 0 on success.
> *
> * **-ENOENT** if the bpf_local_storage cannot be found.
> + *
> + * long bpf_perf_event_aux_pause(struct bpf_map *map, u64 flags, u32 pause)
> + * Description
> + * Pause or resume an AUX area trace associated to the perf event.
> + *
> + * The *flags* argument is specified as the key value for
> + * retrieving event pointer from the passed *map*.
> + *
> + * The *pause* argument controls AUX trace pause or resume.
> + * Non-zero values (true) are to pause the AUX trace and the zero
> + * value (false) is for re-enabling the AUX trace.
> + * Return
> + * 0 on success.
> + *
> + * **-ENOENT** if not found event in the events map.
> + *
> + * **-E2BIG** if the event index passed in the *flags* parameter
> + * is out-of-range of the map.
> + *
> + * **-EINVAL** if the flags passed is an invalid value.
> */
> #define ___BPF_FUNC_MAPPER(FN, ctx...) \
> FN(unspec, 0, ##ctx) \
> @@ -6009,6 +6029,7 @@ union bpf_attr {
> FN(user_ringbuf_drain, 209, ##ctx) \
> FN(cgrp_storage_get, 210, ##ctx) \
> FN(cgrp_storage_delete, 211, ##ctx) \
> + FN(perf_event_aux_pause, 212, ##ctx) \
> /* */
>
> /* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't
>
[...]
Hi Yonghong, Really sorry for the long delay. Now I am restarting this work. On Mon, Dec 16, 2024 at 09:21:15AM -0800, Yonghong Song wrote: > On 12/15/24 11:34 AM, Leo Yan wrote: > > The bpf_perf_event_aux_pause kfunc will be used to control the Perf AUX > > area to pause or resume. > > > > An example use-case is attaching eBPF to Ftrace tracepoints. When a > > tracepoint is hit, the associated eBPF program will be executed. The > > eBPF program can invoke bpf_perf_event_aux_pause() to pause or resume > > AUX trace. This is useful for fine-grained tracing by combining > > Perf and eBPF. > > > > This commit implements the bpf_perf_event_aux_pause kfunc, and make it > > pass the eBPF verifier. > > The subject and commit message mentions to implement a kfunc, > but actually you implemented a uapi helper. Please implement a kfunc > instead (searching __bpf_kfunc in kernel/bpf directory). After some research, my understanding is that kfunc is flexible for exposing APIs via BTF, whereas BPF_CALL is typically used for core BPF features - such as accessing BPF maps. Coming back to this patch: it exposes a function with the following definition: int bpf_perf_event_aux_pause(struct bpf_map *map, u64 flags, u32 pause); I'm not certain whether using __bpf_kfunc is appropriate here, or if I should stick to BPF_CALL to ensure support for accessing bpf_map pointers? Thanks, Leo
On 7/14/25 10:45 AM, Leo Yan wrote: > Hi Yonghong, > > Really sorry for the long delay. Now I am restarting this work. > > On Mon, Dec 16, 2024 at 09:21:15AM -0800, Yonghong Song wrote: >> On 12/15/24 11:34 AM, Leo Yan wrote: >>> The bpf_perf_event_aux_pause kfunc will be used to control the Perf AUX >>> area to pause or resume. >>> >>> An example use-case is attaching eBPF to Ftrace tracepoints. When a >>> tracepoint is hit, the associated eBPF program will be executed. The >>> eBPF program can invoke bpf_perf_event_aux_pause() to pause or resume >>> AUX trace. This is useful for fine-grained tracing by combining >>> Perf and eBPF. >>> >>> This commit implements the bpf_perf_event_aux_pause kfunc, and make it >>> pass the eBPF verifier. >> The subject and commit message mentions to implement a kfunc, >> but actually you implemented a uapi helper. Please implement a kfunc >> instead (searching __bpf_kfunc in kernel/bpf directory). > After some research, my understanding is that kfunc is flexible for > exposing APIs via BTF, whereas BPF_CALL is typically used for core BPF > features - such as accessing BPF maps. > > Coming back to this patch: it exposes a function with the following > definition: > > int bpf_perf_event_aux_pause(struct bpf_map *map, u64 flags, u32 pause); > > I'm not certain whether using __bpf_kfunc is appropriate here, or if I > should stick to BPF_CALL to ensure support for accessing bpf_map > pointers? Using helpers (BPF_CALL) is not an option as the whole bpf ecosystem moves to kfunc mechanism. You can certainly use kfunc with 'struct bpf_map *' as the argument. For example the following kfunc: __bpf_kfunc s64 bpf_map_sum_elem_count(const struct bpf_map *map) in kernel/bpf/map_iter.c > > Thanks, > Leo
Hi Yonghong, On Tue, Jul 15, 2025 at 10:12:02AM -0700, Yonghong Song wrote: [...] > > I'm not certain whether using __bpf_kfunc is appropriate here, or if I > > should stick to BPF_CALL to ensure support for accessing bpf_map > > pointers? > > Using helpers (BPF_CALL) is not an option as the whole bpf ecosystem > moves to kfunc mechanism. You can certainly use kfunc with 'struct bpf_map *' > as the argument. For example the following kfunc: > __bpf_kfunc s64 bpf_map_sum_elem_count(const struct bpf_map *map) > in kernel/bpf/map_iter.c Thanks a lot for suggestion. I followed the idea to refactor the patch with kfunc, see the new version: https://lore.kernel.org/linux-perf-users/20250718-perf_aux_pause_resume_bpf_rebase-v2-0-992557b8fb16@arm.com/T/#m27a72255c93fa672e164cb87a322b979fe8f9408 Just clarify one thing, I defined the kfunc in new patch: int bpf_perf_event_aux_pause(void *p__map, u64 flags, u32 pause) Unlike your suggestion, I defined the first parameter as "void *p__map" (I refers to bpf_arena_alloc_pages()) rather than "struct bpf_map *map". This is because the BPF program will pass a variable from the map section, rather than passing a map pointer. TBH, I do not watch closely the BPF mailing list, so I may not be fully following the conventions. If anything is incorrect, please correct it as needed. Thank you, Leo
On Fri, 2025-07-18 at 16:38 +0100, Leo Yan wrote:
[...]
> Just clarify one thing, I defined the kfunc in new patch:
>
> int bpf_perf_event_aux_pause(void *p__map, u64 flags, u32 pause)
>
> Unlike your suggestion, I defined the first parameter as "void
> *p__map" (I refers to bpf_arena_alloc_pages()) rather than
> "struct bpf_map *map". This is because the BPF program will pass a
> variable from the map section, rather than passing a map pointer.
This is correct,
see commit 8d94f1357c00 ("bpf: Recognize '__map' suffix in kfunc arguments")
[...]
© 2016 - 2025 Red Hat, Inc.