To ultimately allow bpf programs attached to perf events to completely
suppress all of the effects of a perf event overflow (rather than just the
sample output, as they do today), call bpf_overflow_handler() from
__perf_event_overflow() directly rather than modifying struct perf_event's
overflow_handler. Return the bpf program's return value from
bpf_overflow_handler() so that __perf_event_overflow() knows how to
proceed. Remove the now unnecessary orig_overflow_handler from struct
perf_event.
This patch is solely a refactoring and results in no behavior change.
Signed-off-by: Kyle Huey <khuey@kylehuey.com>
Suggested-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Song Liu <song@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
---
include/linux/perf_event.h | 6 +-----
kernel/events/core.c | 28 +++++++++++++++-------------
2 files changed, 16 insertions(+), 18 deletions(-)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index d2a15c0c6f8a..c7f54fd74d89 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -810,7 +810,6 @@ struct perf_event {
perf_overflow_handler_t overflow_handler;
void *overflow_handler_context;
#ifdef CONFIG_BPF_SYSCALL
- perf_overflow_handler_t orig_overflow_handler;
struct bpf_prog *prog;
u64 bpf_cookie;
#endif
@@ -1357,10 +1356,7 @@ __is_default_overflow_handler(perf_overflow_handler_t overflow_handler)
#ifdef CONFIG_BPF_SYSCALL
static inline bool uses_default_overflow_handler(struct perf_event *event)
{
- if (likely(is_default_overflow_handler(event)))
- return true;
-
- return __is_default_overflow_handler(event->orig_overflow_handler);
+ return is_default_overflow_handler(event);
}
#else
#define uses_default_overflow_handler(event) \
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f0f0f71213a1..24a718e7eb98 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -9548,6 +9548,12 @@ static inline bool sample_is_allowed(struct perf_event *event, struct pt_regs *r
return true;
}
+#ifdef CONFIG_BPF_SYSCALL
+static int bpf_overflow_handler(struct perf_event *event,
+ struct perf_sample_data *data,
+ struct pt_regs *regs);
+#endif
+
/*
* Generic event overflow handling, sampling.
*/
@@ -9617,7 +9623,10 @@ static int __perf_event_overflow(struct perf_event *event,
irq_work_queue(&event->pending_irq);
}
- READ_ONCE(event->overflow_handler)(event, data, regs);
+#ifdef CONFIG_BPF_SYSCALL
+ if (!(event->prog && !bpf_overflow_handler(event, data, regs)))
+#endif
+ READ_ONCE(event->overflow_handler)(event, data, regs);
if (*perf_event_fasync(event) && event->pending_kill) {
event->pending_wakeup = 1;
@@ -10427,9 +10436,9 @@ static void perf_event_free_filter(struct perf_event *event)
}
#ifdef CONFIG_BPF_SYSCALL
-static void bpf_overflow_handler(struct perf_event *event,
- struct perf_sample_data *data,
- struct pt_regs *regs)
+static int bpf_overflow_handler(struct perf_event *event,
+ struct perf_sample_data *data,
+ struct pt_regs *regs)
{
struct bpf_perf_event_data_kern ctx = {
.data = data,
@@ -10450,10 +10459,8 @@ static void bpf_overflow_handler(struct perf_event *event,
rcu_read_unlock();
out:
__this_cpu_dec(bpf_prog_active);
- if (!ret)
- return;
- event->orig_overflow_handler(event, data, regs);
+ return ret;
}
static int perf_event_set_bpf_handler(struct perf_event *event,
@@ -10489,8 +10496,6 @@ static int perf_event_set_bpf_handler(struct perf_event *event,
event->prog = prog;
event->bpf_cookie = bpf_cookie;
- event->orig_overflow_handler = READ_ONCE(event->overflow_handler);
- WRITE_ONCE(event->overflow_handler, bpf_overflow_handler);
return 0;
}
@@ -10501,7 +10506,6 @@ static void perf_event_free_bpf_handler(struct perf_event *event)
if (!prog)
return;
- WRITE_ONCE(event->overflow_handler, event->orig_overflow_handler);
event->prog = NULL;
bpf_prog_put(prog);
}
@@ -11975,13 +11979,11 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
overflow_handler = parent_event->overflow_handler;
context = parent_event->overflow_handler_context;
#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_EVENT_TRACING)
- if (overflow_handler == bpf_overflow_handler) {
+ if (parent_event->prog) {
struct bpf_prog *prog = parent_event->prog;
bpf_prog_inc(prog);
event->prog = prog;
- event->orig_overflow_handler =
- parent_event->orig_overflow_handler;
}
#endif
}
--
2.34.1
* Kyle Huey <me@kylehuey.com> wrote:
> To ultimately allow bpf programs attached to perf events to completely
> suppress all of the effects of a perf event overflow (rather than just the
> sample output, as they do today), call bpf_overflow_handler() from
> __perf_event_overflow() directly rather than modifying struct perf_event's
> overflow_handler. Return the bpf program's return value from
> bpf_overflow_handler() so that __perf_event_overflow() knows how to
> proceed. Remove the now unnecessary orig_overflow_handler from struct
> perf_event.
>
> This patch is solely a refactoring and results in no behavior change.
>
> Signed-off-by: Kyle Huey <khuey@kylehuey.com>
> Suggested-by: Namhyung Kim <namhyung@kernel.org>
> Acked-by: Song Liu <song@kernel.org>
> Acked-by: Jiri Olsa <jolsa@kernel.org>
> ---
> include/linux/perf_event.h | 6 +-----
> kernel/events/core.c | 28 +++++++++++++++-------------
> 2 files changed, 16 insertions(+), 18 deletions(-)
>
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index d2a15c0c6f8a..c7f54fd74d89 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -810,7 +810,6 @@ struct perf_event {
> perf_overflow_handler_t overflow_handler;
> void *overflow_handler_context;
> #ifdef CONFIG_BPF_SYSCALL
> - perf_overflow_handler_t orig_overflow_handler;
> struct bpf_prog *prog;
> u64 bpf_cookie;
> #endif
Could we reduce the #ifdeffery please?
On distros CONFIG_BPF_SYSCALL is almost always enabled, so it's not like
this truly saves anything on real systems.
I'd suggest making the perf_event::prog and perf_event::bpf_cookie fields
unconditional.
> +#ifdef CONFIG_BPF_SYSCALL
> +static int bpf_overflow_handler(struct perf_event *event,
> + struct perf_sample_data *data,
> + struct pt_regs *regs);
> +#endif
If the function definitions are misordered then first do a patch that moves
the function earlier in the file, instead of slapping a random prototype
into a random place.
> - READ_ONCE(event->overflow_handler)(event, data, regs);
> +#ifdef CONFIG_BPF_SYSCALL
> + if (!(event->prog && !bpf_overflow_handler(event, data, regs)))
> +#endif
> + READ_ONCE(event->overflow_handler)(event, data, regs);
This #ifdef would go away too - on !CONFIG_BPF_SYSCALL event->prog should
always be NULL.
Please keep the #ifdeffery reduction and function-moving patches separate
from these other changes.
Thanks,
Ingo
On Wed, Apr 10, 2024 at 12:32 AM Ingo Molnar <mingo@kernel.org> wrote:
>
>
> * Kyle Huey <me@kylehuey.com> wrote:
>
> > To ultimately allow bpf programs attached to perf events to completely
> > suppress all of the effects of a perf event overflow (rather than just the
> > sample output, as they do today), call bpf_overflow_handler() from
> > __perf_event_overflow() directly rather than modifying struct perf_event's
> > overflow_handler. Return the bpf program's return value from
> > bpf_overflow_handler() so that __perf_event_overflow() knows how to
> > proceed. Remove the now unnecessary orig_overflow_handler from struct
> > perf_event.
> >
> > This patch is solely a refactoring and results in no behavior change.
> >
> > Signed-off-by: Kyle Huey <khuey@kylehuey.com>
> > Suggested-by: Namhyung Kim <namhyung@kernel.org>
> > Acked-by: Song Liu <song@kernel.org>
> > Acked-by: Jiri Olsa <jolsa@kernel.org>
> > ---
> > include/linux/perf_event.h | 6 +-----
> > kernel/events/core.c | 28 +++++++++++++++-------------
> > 2 files changed, 16 insertions(+), 18 deletions(-)
> >
> > diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> > index d2a15c0c6f8a..c7f54fd74d89 100644
> > --- a/include/linux/perf_event.h
> > +++ b/include/linux/perf_event.h
> > @@ -810,7 +810,6 @@ struct perf_event {
> > perf_overflow_handler_t overflow_handler;
> > void *overflow_handler_context;
> > #ifdef CONFIG_BPF_SYSCALL
> > - perf_overflow_handler_t orig_overflow_handler;
> > struct bpf_prog *prog;
> > u64 bpf_cookie;
> > #endif
>
> Could we reduce the #ifdeffery please?
Not easily.
> On distros CONFIG_BPF_SYSCALL is almost always enabled, so it's not like
> this truly saves anything on real systems.
>
> I'd suggest making the perf_event::prog and perf_event::bpf_cookie fields
> unconditional.
That's not sufficient. See below.
> > +#ifdef CONFIG_BPF_SYSCALL
> > +static int bpf_overflow_handler(struct perf_event *event,
> > + struct perf_sample_data *data,
> > + struct pt_regs *regs);
> > +#endif
>
> If the function definitions are misordered then first do a patch that moves
> the function earlier in the file, instead of slapping a random prototype
> into a random place.
Ok.
> > - READ_ONCE(event->overflow_handler)(event, data, regs);
> > +#ifdef CONFIG_BPF_SYSCALL
> > + if (!(event->prog && !bpf_overflow_handler(event, data, regs)))
> > +#endif
> > + READ_ONCE(event->overflow_handler)(event, data, regs);
>
> This #ifdef would go away too - on !CONFIG_BPF_SYSCALL event->prog should
> always be NULL.
bpf_overflow_handler() is also #ifdef CONFIG_BPF_SYSCALL. It uses
bpf_prog_active, so that would need to be moved out of the ifdef,
which would require moving the DEFINE_PER_CPU out of bpf/syscall.c ...
or I'd have to add a !CONFIG_BPF_SYSCALL definition of
bpf_overflow_handler() that only returns 1 and never actually gets
called because the condition short-circuits on event->prog. Neither
seems like it makes my patch or the code simpler, especially since
this weird ifdef-that-applies-only-to-the-condition goes away in Part
3 where I actually change the behavior.
It feels like the root of your objection is that CONFIG_BPF_SYSCALL
exists at all. I could remove it in a separate patch if there's
consensus about that.
> Please keep the #ifdeffery reduction and function-moving patches separate
> from these other changes.
>
> Thanks,
>
> Ingo
- Kyle
On Thu, Apr 11, 2024 at 8:11 AM Kyle Huey <me@kylehuey.com> wrote:
>
> On Wed, Apr 10, 2024 at 12:32 AM Ingo Molnar <mingo@kernel.org> wrote:
> >
> >
> > * Kyle Huey <me@kylehuey.com> wrote:
> >
> > > To ultimately allow bpf programs attached to perf events to completely
> > > suppress all of the effects of a perf event overflow (rather than just the
> > > sample output, as they do today), call bpf_overflow_handler() from
> > > __perf_event_overflow() directly rather than modifying struct perf_event's
> > > overflow_handler. Return the bpf program's return value from
> > > bpf_overflow_handler() so that __perf_event_overflow() knows how to
> > > proceed. Remove the now unnecessary orig_overflow_handler from struct
> > > perf_event.
> > >
> > > This patch is solely a refactoring and results in no behavior change.
> > >
> > > Signed-off-by: Kyle Huey <khuey@kylehuey.com>
> > > Suggested-by: Namhyung Kim <namhyung@kernel.org>
> > > Acked-by: Song Liu <song@kernel.org>
> > > Acked-by: Jiri Olsa <jolsa@kernel.org>
> > > ---
> > > include/linux/perf_event.h | 6 +-----
> > > kernel/events/core.c | 28 +++++++++++++++-------------
> > > 2 files changed, 16 insertions(+), 18 deletions(-)
> > >
> > > diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> > > index d2a15c0c6f8a..c7f54fd74d89 100644
> > > --- a/include/linux/perf_event.h
> > > +++ b/include/linux/perf_event.h
> > > @@ -810,7 +810,6 @@ struct perf_event {
> > > perf_overflow_handler_t overflow_handler;
> > > void *overflow_handler_context;
> > > #ifdef CONFIG_BPF_SYSCALL
> > > - perf_overflow_handler_t orig_overflow_handler;
> > > struct bpf_prog *prog;
> > > u64 bpf_cookie;
> > > #endif
> >
> > Could we reduce the #ifdeffery please?
>
> Not easily.
>
> > On distros CONFIG_BPF_SYSCALL is almost always enabled, so it's not like
> > this truly saves anything on real systems.
> >
> > I'd suggest making the perf_event::prog and perf_event::bpf_cookie fields
> > unconditional.
>
> That's not sufficient. See below.
>
> > > +#ifdef CONFIG_BPF_SYSCALL
> > > +static int bpf_overflow_handler(struct perf_event *event,
> > > + struct perf_sample_data *data,
> > > + struct pt_regs *regs);
> > > +#endif
> >
> > If the function definitions are misordered then first do a patch that moves
> > the function earlier in the file, instead of slapping a random prototype
> > into a random place.
>
> Ok.
>
> > > - READ_ONCE(event->overflow_handler)(event, data, regs);
> > > +#ifdef CONFIG_BPF_SYSCALL
> > > + if (!(event->prog && !bpf_overflow_handler(event, data, regs)))
> > > +#endif
> > > + READ_ONCE(event->overflow_handler)(event, data, regs);
> >
> > This #ifdef would go away too - on !CONFIG_BPF_SYSCALL event->prog should
> > always be NULL.
>
> bpf_overflow_handler() is also #ifdef CONFIG_BPF_SYSCALL. It uses
> bpf_prog_active, so that would need to be moved out of the ifdef,
> which would require moving the DEFINE_PER_CPU out of bpf/syscall.c ...
> or I'd have to add a !CONFIG_BPF_SYSCALL definition of
> bpf_overflow_handler() that only returns 1 and never actually gets
> called because the condition short-circuits on event->prog. Neither
> seems like it makes my patch or the code simpler, especially since
> this weird ifdef-that-applies-only-to-the-condition goes away in Part
> 3 where I actually change the behavior.
After fiddling with this I think the stub definition of
bpf_overflow_handler() is fine. The other CONFIG_BPF_SYSCALL functions
in this file already have similar stubs. I'll send a new patch set.
- Kyle
> It feels like the root of your objection is that CONFIG_BPF_SYSCALL
> exists at all. I could remove it in a separate patch if there's
> consensus about that.
>
>
>
>
> > Please keep the #ifdeffery reduction and function-moving patches separate
> > from these other changes.
> >
> > Thanks,
> >
> > Ingo
>
> - Kyle
On Wed, Feb 14, 2024 at 9:40 AM Kyle Huey <me@kylehuey.com> wrote:
>
> To ultimately allow bpf programs attached to perf events to completely
> suppress all of the effects of a perf event overflow (rather than just the
> sample output, as they do today), call bpf_overflow_handler() from
> __perf_event_overflow() directly rather than modifying struct perf_event's
> overflow_handler. Return the bpf program's return value from
> bpf_overflow_handler() so that __perf_event_overflow() knows how to
> proceed. Remove the now unnecessary orig_overflow_handler from struct
> perf_event.
>
> This patch is solely a refactoring and results in no behavior change.
>
> Signed-off-by: Kyle Huey <khuey@kylehuey.com>
> Suggested-by: Namhyung Kim <namhyung@kernel.org>
> Acked-by: Song Liu <song@kernel.org>
> Acked-by: Jiri Olsa <jolsa@kernel.org>
> ---
> include/linux/perf_event.h | 6 +-----
> kernel/events/core.c | 28 +++++++++++++++-------------
> 2 files changed, 16 insertions(+), 18 deletions(-)
>
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index d2a15c0c6f8a..c7f54fd74d89 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -810,7 +810,6 @@ struct perf_event {
> perf_overflow_handler_t overflow_handler;
> void *overflow_handler_context;
> #ifdef CONFIG_BPF_SYSCALL
> - perf_overflow_handler_t orig_overflow_handler;
> struct bpf_prog *prog;
> u64 bpf_cookie;
> #endif
> @@ -1357,10 +1356,7 @@ __is_default_overflow_handler(perf_overflow_handler_t overflow_handler)
> #ifdef CONFIG_BPF_SYSCALL
> static inline bool uses_default_overflow_handler(struct perf_event *event)
> {
> - if (likely(is_default_overflow_handler(event)))
> - return true;
> -
> - return __is_default_overflow_handler(event->orig_overflow_handler);
> + return is_default_overflow_handler(event);
> }
> #else
> #define uses_default_overflow_handler(event) \
and so in both cases uses_default_overflow_handler() is now just
is_default_overflow_handler(), right? So we can clean all this up
quite a bit?
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index f0f0f71213a1..24a718e7eb98 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -9548,6 +9548,12 @@ static inline bool sample_is_allowed(struct perf_event *event, struct pt_regs *r
> return true;
> }
>
> +#ifdef CONFIG_BPF_SYSCALL
> +static int bpf_overflow_handler(struct perf_event *event,
> + struct perf_sample_data *data,
> + struct pt_regs *regs);
> +#endif
> +
> /*
> * Generic event overflow handling, sampling.
> */
> @@ -9617,7 +9623,10 @@ static int __perf_event_overflow(struct perf_event *event,
> irq_work_queue(&event->pending_irq);
> }
>
> - READ_ONCE(event->overflow_handler)(event, data, regs);
> +#ifdef CONFIG_BPF_SYSCALL
> + if (!(event->prog && !bpf_overflow_handler(event, data, regs)))
> +#endif
> + READ_ONCE(event->overflow_handler)(event, data, regs);
This is quite hard to follow... And that CONFIG_BPF_SYSCALL check
breaking apart that if statement is not great. Maybe something like:
bool skip_def_handler = false;
#ifdef CONFIG_BPF_SYSCALL
if (event->prog)
skip = bpf_overflow_handler(event, data, regs) == 0;
#endif
if (!skip_def_handler)
READ_ONCE(event->overflow_handler)(event, data, regs);
we can of course invert "skip" to be "run" and invert conditions, if
that's easier to follow
>
> if (*perf_event_fasync(event) && event->pending_kill) {
> event->pending_wakeup = 1;
> @@ -10427,9 +10436,9 @@ static void perf_event_free_filter(struct perf_event *event)
> }
>
> #ifdef CONFIG_BPF_SYSCALL
> -static void bpf_overflow_handler(struct perf_event *event,
> - struct perf_sample_data *data,
> - struct pt_regs *regs)
> +static int bpf_overflow_handler(struct perf_event *event,
> + struct perf_sample_data *data,
> + struct pt_regs *regs)
> {
> struct bpf_perf_event_data_kern ctx = {
> .data = data,
[...]
© 2016 - 2026 Red Hat, Inc.