Parse the off-cpu event using parse_event(), as bpf-output.
no-inherit should be set to 1, here's the reason:
We update the BPF perf_event map for direct off-cpu sample dumping (in
following patches), it executes as follows:
bpf_map_update_value()
bpf_fd_array_map_update_elem()
perf_event_fd_array_get_ptr()
perf_event_read_local()
In perf_event_read_local(), there is:
int perf_event_read_local(struct perf_event *event, u64 *value,
u64 *enabled, u64 *running)
{
...
/*
* It must not be an event with inherit set, we cannot read
* all child counters from atomic context.
*/
if (event->attr.inherit) {
ret = -EOPNOTSUPP;
goto out;
}
Which means no-inherit has to be true for updating the BPF perf_event
map.
Moreover, for bpf-output events, we primarily want a system-wide event
instead of a per-task event.
The reason is that in BPF's bpf_perf_event_output(), BPF uses the CPU
index to retrieve the perf_event file descriptor it outputs to.
Making a bpf-output event system-wide naturally satisfies this
requirement by mapping CPU appropriately.
Suggested-by: Namhyung Kim <namhyung@kernel.org>
Reviewed-by: Ian Rogers <irogers@google.com>
Signed-off-by: Howard Chu <howardchu95@gmail.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@linaro.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20241108204137.2444151-4-howardchu95@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
tools/perf/util/bpf_off_cpu.c | 33 +++++++++++----------------------
1 file changed, 11 insertions(+), 22 deletions(-)
diff --git a/tools/perf/util/bpf_off_cpu.c b/tools/perf/util/bpf_off_cpu.c
index a590a8ac1f9d..558c5e5c2dc3 100644
--- a/tools/perf/util/bpf_off_cpu.c
+++ b/tools/perf/util/bpf_off_cpu.c
@@ -38,32 +38,21 @@ union off_cpu_data {
static int off_cpu_config(struct evlist *evlist)
{
+ char off_cpu_event[64];
struct evsel *evsel;
- struct perf_event_attr attr = {
- .type = PERF_TYPE_SOFTWARE,
- .config = PERF_COUNT_SW_BPF_OUTPUT,
- .size = sizeof(attr), /* to capture ABI version */
- };
- char *evname = strdup(OFFCPU_EVENT);
-
- if (evname == NULL)
- return -ENOMEM;
- evsel = evsel__new(&attr);
- if (!evsel) {
- free(evname);
- return -ENOMEM;
+ scnprintf(off_cpu_event, sizeof(off_cpu_event), "bpf-output/no-inherit=1,name=%s/", OFFCPU_EVENT);
+ if (parse_event(evlist, off_cpu_event)) {
+ pr_err("Failed to open off-cpu event\n");
+ return -1;
}
- evsel->core.attr.freq = 1;
- evsel->core.attr.sample_period = 1;
- /* off-cpu analysis depends on stack trace */
- evsel->core.attr.sample_type = PERF_SAMPLE_CALLCHAIN;
-
- evlist__add(evlist, evsel);
-
- free(evsel->name);
- evsel->name = evname;
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel__is_offcpu_event(evsel)) {
+ evsel->core.system_wide = true;
+ break;
+ }
+ }
return 0;
}
--
2.43.0
On Tue, Nov 12, 2024 at 04:28:11PM -0800, Howard Chu wrote: > Parse the off-cpu event using parse_event(), as bpf-output. > > no-inherit should be set to 1, here's the reason: > > We update the BPF perf_event map for direct off-cpu sample dumping (in > following patches), it executes as follows: > > bpf_map_update_value() > bpf_fd_array_map_update_elem() > perf_event_fd_array_get_ptr() > perf_event_read_local() > > In perf_event_read_local(), there is: > > int perf_event_read_local(struct perf_event *event, u64 *value, > u64 *enabled, u64 *running) > { > ... > /* > * It must not be an event with inherit set, we cannot read > * all child counters from atomic context. > */ > if (event->attr.inherit) { > ret = -EOPNOTSUPP; > goto out; > } > > Which means no-inherit has to be true for updating the BPF perf_event > map. > > Moreover, for bpf-output events, we primarily want a system-wide event > instead of a per-task event. > > The reason is that in BPF's bpf_perf_event_output(), BPF uses the CPU > index to retrieve the perf_event file descriptor it outputs to. > > Making a bpf-output event system-wide naturally satisfies this > requirement by mapping CPU appropriately. I'm afraid the inherit attribute would be updated later: __cmd_record() evlist__config() evsel__config() You can add a logic to check the config term when setting the inherit value. Thanks, Namhyung > > Suggested-by: Namhyung Kim <namhyung@kernel.org> > Reviewed-by: Ian Rogers <irogers@google.com> > Signed-off-by: Howard Chu <howardchu95@gmail.com> > Cc: Adrian Hunter <adrian.hunter@intel.com> > Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> > Cc: Ingo Molnar <mingo@redhat.com> > Cc: James Clark <james.clark@linaro.org> > Cc: Jiri Olsa <jolsa@kernel.org> > Cc: Kan Liang <kan.liang@linux.intel.com> > Cc: Mark Rutland <mark.rutland@arm.com> > Cc: Peter Zijlstra <peterz@infradead.org> > Link: https://lore.kernel.org/r/20241108204137.2444151-4-howardchu95@gmail.com > Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> > --- > tools/perf/util/bpf_off_cpu.c | 33 +++++++++++---------------------- > 1 file changed, 11 insertions(+), 22 deletions(-) > > diff --git a/tools/perf/util/bpf_off_cpu.c b/tools/perf/util/bpf_off_cpu.c > index a590a8ac1f9d..558c5e5c2dc3 100644 > --- a/tools/perf/util/bpf_off_cpu.c > +++ b/tools/perf/util/bpf_off_cpu.c > @@ -38,32 +38,21 @@ union off_cpu_data { > > static int off_cpu_config(struct evlist *evlist) > { > + char off_cpu_event[64]; > struct evsel *evsel; > - struct perf_event_attr attr = { > - .type = PERF_TYPE_SOFTWARE, > - .config = PERF_COUNT_SW_BPF_OUTPUT, > - .size = sizeof(attr), /* to capture ABI version */ > - }; > - char *evname = strdup(OFFCPU_EVENT); > - > - if (evname == NULL) > - return -ENOMEM; > > - evsel = evsel__new(&attr); > - if (!evsel) { > - free(evname); > - return -ENOMEM; > + scnprintf(off_cpu_event, sizeof(off_cpu_event), "bpf-output/no-inherit=1,name=%s/", OFFCPU_EVENT); > + if (parse_event(evlist, off_cpu_event)) { > + pr_err("Failed to open off-cpu event\n"); > + return -1; > } > > - evsel->core.attr.freq = 1; > - evsel->core.attr.sample_period = 1; > - /* off-cpu analysis depends on stack trace */ > - evsel->core.attr.sample_type = PERF_SAMPLE_CALLCHAIN; > - > - evlist__add(evlist, evsel); > - > - free(evsel->name); > - evsel->name = evname; > + evlist__for_each_entry(evlist, evsel) { > + if (evsel__is_offcpu_event(evsel)) { > + evsel->core.system_wide = true; > + break; > + } > + } > > return 0; > } > -- > 2.43.0 >
Hello, On Mon, Nov 18, 2024 at 1:17 PM Namhyung Kim <namhyung@kernel.org> wrote: > > On Tue, Nov 12, 2024 at 04:28:11PM -0800, Howard Chu wrote: > > Parse the off-cpu event using parse_event(), as bpf-output. > > > > no-inherit should be set to 1, here's the reason: > > > > We update the BPF perf_event map for direct off-cpu sample dumping (in > > following patches), it executes as follows: > > > > bpf_map_update_value() > > bpf_fd_array_map_update_elem() > > perf_event_fd_array_get_ptr() > > perf_event_read_local() > > > > In perf_event_read_local(), there is: > > > > int perf_event_read_local(struct perf_event *event, u64 *value, > > u64 *enabled, u64 *running) > > { > > ... > > /* > > * It must not be an event with inherit set, we cannot read > > * all child counters from atomic context. > > */ > > if (event->attr.inherit) { > > ret = -EOPNOTSUPP; > > goto out; > > } > > > > Which means no-inherit has to be true for updating the BPF perf_event > > map. > > > > Moreover, for bpf-output events, we primarily want a system-wide event > > instead of a per-task event. > > > > The reason is that in BPF's bpf_perf_event_output(), BPF uses the CPU > > index to retrieve the perf_event file descriptor it outputs to. > > > > Making a bpf-output event system-wide naturally satisfies this > > requirement by mapping CPU appropriately. > > I'm afraid the inherit attribute would be updated later: > > __cmd_record() > evlist__config() > evsel__config() > > You can add a logic to check the config term when setting the inherit > value. Sure. Thanks, Howard > > Thanks, > Namhyung > > > > > Suggested-by: Namhyung Kim <namhyung@kernel.org> > > Reviewed-by: Ian Rogers <irogers@google.com> > > Signed-off-by: Howard Chu <howardchu95@gmail.com> > > Cc: Adrian Hunter <adrian.hunter@intel.com> > > Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> > > Cc: Ingo Molnar <mingo@redhat.com> > > Cc: James Clark <james.clark@linaro.org> > > Cc: Jiri Olsa <jolsa@kernel.org> > > Cc: Kan Liang <kan.liang@linux.intel.com> > > Cc: Mark Rutland <mark.rutland@arm.com> > > Cc: Peter Zijlstra <peterz@infradead.org> > > Link: https://lore.kernel.org/r/20241108204137.2444151-4-howardchu95@gmail.com > > Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> > > --- > > tools/perf/util/bpf_off_cpu.c | 33 +++++++++++---------------------- > > 1 file changed, 11 insertions(+), 22 deletions(-) > > > > diff --git a/tools/perf/util/bpf_off_cpu.c b/tools/perf/util/bpf_off_cpu.c > > index a590a8ac1f9d..558c5e5c2dc3 100644 > > --- a/tools/perf/util/bpf_off_cpu.c > > +++ b/tools/perf/util/bpf_off_cpu.c > > @@ -38,32 +38,21 @@ union off_cpu_data { > > > > static int off_cpu_config(struct evlist *evlist) > > { > > + char off_cpu_event[64]; > > struct evsel *evsel; > > - struct perf_event_attr attr = { > > - .type = PERF_TYPE_SOFTWARE, > > - .config = PERF_COUNT_SW_BPF_OUTPUT, > > - .size = sizeof(attr), /* to capture ABI version */ > > - }; > > - char *evname = strdup(OFFCPU_EVENT); > > - > > - if (evname == NULL) > > - return -ENOMEM; > > > > - evsel = evsel__new(&attr); > > - if (!evsel) { > > - free(evname); > > - return -ENOMEM; > > + scnprintf(off_cpu_event, sizeof(off_cpu_event), "bpf-output/no-inherit=1,name=%s/", OFFCPU_EVENT); > > + if (parse_event(evlist, off_cpu_event)) { > > + pr_err("Failed to open off-cpu event\n"); > > + return -1; > > } > > > > - evsel->core.attr.freq = 1; > > - evsel->core.attr.sample_period = 1; > > - /* off-cpu analysis depends on stack trace */ > > - evsel->core.attr.sample_type = PERF_SAMPLE_CALLCHAIN; > > - > > - evlist__add(evlist, evsel); > > - > > - free(evsel->name); > > - evsel->name = evname; > > + evlist__for_each_entry(evlist, evsel) { > > + if (evsel__is_offcpu_event(evsel)) { > > + evsel->core.system_wide = true; > > + break; > > + } > > + } > > > > return 0; > > } > > -- > > 2.43.0 > >
© 2016 - 2024 Red Hat, Inc.