Parse the off-cpu event using parse_event, as bpf-output.
no-inherit is should be set to 1, here's the reason:
We update the BPF perf_event map for direct off-cpu sample dumping (in
following patches), it executes as follows:
bpf_map_update_value()
bpf_fd_array_map_update_elem()
perf_event_fd_array_get_ptr()
perf_event_read_local()
In perf_event_read_local(), there is:
int perf_event_read_local(struct perf_event *event, u64 *value,
u64 *enabled, u64 *running)
{
...
/*
* It must not be an event with inherit set, we cannot read
* all child counters from atomic context.
*/
if (event->attr.inherit) {
ret = -EOPNOTSUPP;
goto out;
}
Which means no-inherit has to be true for updating the BPF perf_event
map.
Moreover, for bpf-output events, we primarily want a system-wide event
instead of a per-task event.
The reason is that in BPF's bpf_perf_event_output(), BPF uses the CPU
index to retrieve the perf_event file descriptor it outputs to.
Making a bpf-output event system-wide naturally satisfies this
requirement by mapping CPU appropriately.
Suggested-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Howard Chu <howardchu95@gmail.com>
---
tools/perf/util/bpf_off_cpu.c | 33 +++++++++++----------------------
1 file changed, 11 insertions(+), 22 deletions(-)
diff --git a/tools/perf/util/bpf_off_cpu.c b/tools/perf/util/bpf_off_cpu.c
index a590a8ac1f9d..558c5e5c2dc3 100644
--- a/tools/perf/util/bpf_off_cpu.c
+++ b/tools/perf/util/bpf_off_cpu.c
@@ -38,32 +38,21 @@ union off_cpu_data {
static int off_cpu_config(struct evlist *evlist)
{
+ char off_cpu_event[64];
struct evsel *evsel;
- struct perf_event_attr attr = {
- .type = PERF_TYPE_SOFTWARE,
- .config = PERF_COUNT_SW_BPF_OUTPUT,
- .size = sizeof(attr), /* to capture ABI version */
- };
- char *evname = strdup(OFFCPU_EVENT);
-
- if (evname == NULL)
- return -ENOMEM;
- evsel = evsel__new(&attr);
- if (!evsel) {
- free(evname);
- return -ENOMEM;
+ scnprintf(off_cpu_event, sizeof(off_cpu_event), "bpf-output/no-inherit=1,name=%s/", OFFCPU_EVENT);
+ if (parse_event(evlist, off_cpu_event)) {
+ pr_err("Failed to open off-cpu event\n");
+ return -1;
}
- evsel->core.attr.freq = 1;
- evsel->core.attr.sample_period = 1;
- /* off-cpu analysis depends on stack trace */
- evsel->core.attr.sample_type = PERF_SAMPLE_CALLCHAIN;
-
- evlist__add(evlist, evsel);
-
- free(evsel->name);
- evsel->name = evname;
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel__is_offcpu_event(evsel)) {
+ evsel->core.system_wide = true;
+ break;
+ }
+ }
return 0;
}
--
2.43.0
On Fri, Nov 8, 2024 at 12:41 PM Howard Chu <howardchu95@gmail.com> wrote: > > Parse the off-cpu event using parse_event, as bpf-output. > > no-inherit is should be set to 1, here's the reason: nit: s/is should be/should be/ > We update the BPF perf_event map for direct off-cpu sample dumping (in > following patches), it executes as follows: > > bpf_map_update_value() > bpf_fd_array_map_update_elem() > perf_event_fd_array_get_ptr() > perf_event_read_local() > > In perf_event_read_local(), there is: > > int perf_event_read_local(struct perf_event *event, u64 *value, > u64 *enabled, u64 *running) > { > ... > /* > * It must not be an event with inherit set, we cannot read > * all child counters from atomic context. > */ > if (event->attr.inherit) { > ret = -EOPNOTSUPP; > goto out; > } > > Which means no-inherit has to be true for updating the BPF perf_event > map. > > Moreover, for bpf-output events, we primarily want a system-wide event > instead of a per-task event. > > The reason is that in BPF's bpf_perf_event_output(), BPF uses the CPU > index to retrieve the perf_event file descriptor it outputs to. > > Making a bpf-output event system-wide naturally satisfies this > requirement by mapping CPU appropriately. > > Suggested-by: Namhyung Kim <namhyung@kernel.org> > Signed-off-by: Howard Chu <howardchu95@gmail.com> > --- > tools/perf/util/bpf_off_cpu.c | 33 +++++++++++---------------------- > 1 file changed, 11 insertions(+), 22 deletions(-) > > diff --git a/tools/perf/util/bpf_off_cpu.c b/tools/perf/util/bpf_off_cpu.c > index a590a8ac1f9d..558c5e5c2dc3 100644 > --- a/tools/perf/util/bpf_off_cpu.c > +++ b/tools/perf/util/bpf_off_cpu.c > @@ -38,32 +38,21 @@ union off_cpu_data { > > static int off_cpu_config(struct evlist *evlist) > { > + char off_cpu_event[64]; > struct evsel *evsel; > - struct perf_event_attr attr = { > - .type = PERF_TYPE_SOFTWARE, > - .config = PERF_COUNT_SW_BPF_OUTPUT, > - .size = sizeof(attr), /* to capture ABI version */ > - }; > - char *evname = strdup(OFFCPU_EVENT); > - > - if (evname == NULL) > - return -ENOMEM; > > - evsel = evsel__new(&attr); > - if (!evsel) { > - free(evname); > - return -ENOMEM; > + scnprintf(off_cpu_event, sizeof(off_cpu_event), "bpf-output/no-inherit=1,name=%s/", OFFCPU_EVENT); > + if (parse_event(evlist, off_cpu_event)) { > + pr_err("Failed to open off-cpu event\n"); > + return -1; Woot, love this! Much happier to see parse_events being used than hand crafted attributes. This will help us keep things synchronized via event parsing. Reviewed-by: Ian Rogers <irogers@google.com> Thanks, Ian > } > > - evsel->core.attr.freq = 1; > - evsel->core.attr.sample_period = 1; > - /* off-cpu analysis depends on stack trace */ > - evsel->core.attr.sample_type = PERF_SAMPLE_CALLCHAIN; > - > - evlist__add(evlist, evsel); > - > - free(evsel->name); > - evsel->name = evname; > + evlist__for_each_entry(evlist, evsel) { > + if (evsel__is_offcpu_event(evsel)) { > + evsel->core.system_wide = true; > + break; > + } > + } > > return 0; > } > -- > 2.43.0 >
Hello, On Mon, Nov 11, 2024 at 9:45 AM Ian Rogers <irogers@google.com> wrote: > > On Fri, Nov 8, 2024 at 12:41 PM Howard Chu <howardchu95@gmail.com> wrote: > > > > Parse the off-cpu event using parse_event, as bpf-output. > > > > no-inherit is should be set to 1, here's the reason: > > nit: s/is should be/should be/ Interesting sed substitution format. Good catch! I will fix that in a next commit, thanks! > > > We update the BPF perf_event map for direct off-cpu sample dumping (in > > following patches), it executes as follows: > > > > bpf_map_update_value() > > bpf_fd_array_map_update_elem() > > perf_event_fd_array_get_ptr() > > perf_event_read_local() > > > > In perf_event_read_local(), there is: > > > > int perf_event_read_local(struct perf_event *event, u64 *value, > > u64 *enabled, u64 *running) > > { > > ... > > /* > > * It must not be an event with inherit set, we cannot read > > * all child counters from atomic context. > > */ > > if (event->attr.inherit) { > > ret = -EOPNOTSUPP; > > goto out; > > } > > > > Which means no-inherit has to be true for updating the BPF perf_event > > map. > > > > Moreover, for bpf-output events, we primarily want a system-wide event > > instead of a per-task event. > > > > The reason is that in BPF's bpf_perf_event_output(), BPF uses the CPU > > index to retrieve the perf_event file descriptor it outputs to. > > > > Making a bpf-output event system-wide naturally satisfies this > > requirement by mapping CPU appropriately. > > > > Suggested-by: Namhyung Kim <namhyung@kernel.org> > > Signed-off-by: Howard Chu <howardchu95@gmail.com> > > --- > > tools/perf/util/bpf_off_cpu.c | 33 +++++++++++---------------------- > > 1 file changed, 11 insertions(+), 22 deletions(-) > > > > diff --git a/tools/perf/util/bpf_off_cpu.c b/tools/perf/util/bpf_off_cpu.c > > index a590a8ac1f9d..558c5e5c2dc3 100644 > > --- a/tools/perf/util/bpf_off_cpu.c > > +++ b/tools/perf/util/bpf_off_cpu.c > > @@ -38,32 +38,21 @@ union off_cpu_data { > > > > static int off_cpu_config(struct evlist *evlist) > > { > > + char off_cpu_event[64]; > > struct evsel *evsel; > > - struct perf_event_attr attr = { > > - .type = PERF_TYPE_SOFTWARE, > > - .config = PERF_COUNT_SW_BPF_OUTPUT, > > - .size = sizeof(attr), /* to capture ABI version */ > > - }; > > - char *evname = strdup(OFFCPU_EVENT); > > - > > - if (evname == NULL) > > - return -ENOMEM; > > > > - evsel = evsel__new(&attr); > > - if (!evsel) { > > - free(evname); > > - return -ENOMEM; > > + scnprintf(off_cpu_event, sizeof(off_cpu_event), "bpf-output/no-inherit=1,name=%s/", OFFCPU_EVENT); > > + if (parse_event(evlist, off_cpu_event)) { > > + pr_err("Failed to open off-cpu event\n"); > > + return -1; > > Woot, love this! Much happier to see parse_events being used than hand > crafted attributes. This will help us keep things synchronized via > event parsing. > > Reviewed-by: Ian Rogers <irogers@google.com> > > Thanks, > Ian > > > } > > > > - evsel->core.attr.freq = 1; > > - evsel->core.attr.sample_period = 1; > > - /* off-cpu analysis depends on stack trace */ > > - evsel->core.attr.sample_type = PERF_SAMPLE_CALLCHAIN; > > - > > - evlist__add(evlist, evsel); > > - > > - free(evsel->name); > > - evsel->name = evname; > > + evlist__for_each_entry(evlist, evsel) { > > + if (evsel__is_offcpu_event(evsel)) { > > + evsel->core.system_wide = true; > > + break; > > + } > > + } > > > > return 0; > > } > > -- > > 2.43.0 > > Thanks, Howard
© 2016 - 2024 Red Hat, Inc.