Dump the remaining samples, as if it is dumping a direct sample.
Put the stack trace, tid, off-cpu time and cgroup id into the raw_data
section, just like a direct off-cpu sample coming from BPF's
bpf_perf_event_output().
This ensures that evsel__parse_sample() correctly parses both direct
samples and accumulated samples.
Suggested-by: Namhyung Kim <namhyung@kernel.org>
Reviewed-by: Ian Rogers <irogers@google.com>
Signed-off-by: Howard Chu <howardchu95@gmail.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@linaro.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20241108204137.2444151-10-howardchu95@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
tools/perf/util/bpf_off_cpu.c | 59 +++++++++++++++++++++--------------
1 file changed, 35 insertions(+), 24 deletions(-)
diff --git a/tools/perf/util/bpf_off_cpu.c b/tools/perf/util/bpf_off_cpu.c
index 2378fb5a893f..d6f015362e85 100644
--- a/tools/perf/util/bpf_off_cpu.c
+++ b/tools/perf/util/bpf_off_cpu.c
@@ -37,6 +37,8 @@ union off_cpu_data {
u64 array[1024 / sizeof(u64)];
};
+u64 off_cpu_raw[MAX_STACKS + 5];
+
static int off_cpu_config(struct evlist *evlist)
{
char off_cpu_event[64];
@@ -309,6 +311,7 @@ int off_cpu_write(struct perf_session *session)
{
int bytes = 0, size;
int fd, stack;
+ u32 raw_size;
u64 sample_type, val, sid = 0;
struct evsel *evsel;
struct perf_data_file *file = &session->data->file;
@@ -348,46 +351,54 @@ int off_cpu_write(struct perf_session *session)
while (!bpf_map_get_next_key(fd, &prev, &key)) {
int n = 1; /* start from perf_event_header */
- int ip_pos = -1;
bpf_map_lookup_elem(fd, &key, &val);
+ /* zero-fill some of the fields, will be overwritten by raw_data when parsing */
if (sample_type & PERF_SAMPLE_IDENTIFIER)
data.array[n++] = sid;
- if (sample_type & PERF_SAMPLE_IP) {
- ip_pos = n;
+ if (sample_type & PERF_SAMPLE_IP)
data.array[n++] = 0; /* will be updated */
- }
if (sample_type & PERF_SAMPLE_TID)
- data.array[n++] = (u64)key.pid << 32 | key.tgid;
+ data.array[n++] = 0;
if (sample_type & PERF_SAMPLE_TIME)
data.array[n++] = tstamp;
- if (sample_type & PERF_SAMPLE_ID)
- data.array[n++] = sid;
if (sample_type & PERF_SAMPLE_CPU)
data.array[n++] = 0;
if (sample_type & PERF_SAMPLE_PERIOD)
- data.array[n++] = val;
- if (sample_type & PERF_SAMPLE_CALLCHAIN) {
- int len = 0;
-
- /* data.array[n] is callchain->nr (updated later) */
- data.array[n + 1] = PERF_CONTEXT_USER;
- data.array[n + 2] = 0;
-
- bpf_map_lookup_elem(stack, &key.stack_id, &data.array[n + 2]);
- while (data.array[n + 2 + len])
+ data.array[n++] = 0;
+ if (sample_type & PERF_SAMPLE_RAW) {
+ /*
+ * [ size ][ data ]
+ * [ data ]
+ * [ data ]
+ * [ data ]
+ * [ data ][ empty]
+ */
+ int len = 0, i = 0;
+ void *raw_data = (void *)data.array + n * sizeof(u64);
+
+ off_cpu_raw[i++] = (u64)key.pid << 32 | key.tgid;
+ off_cpu_raw[i++] = val;
+
+ /* off_cpu_raw[i] is callchain->nr (updated later) */
+ off_cpu_raw[i + 1] = PERF_CONTEXT_USER;
+ off_cpu_raw[i + 2] = 0;
+
+ bpf_map_lookup_elem(stack, &key.stack_id, &off_cpu_raw[i + 2]);
+ while (off_cpu_raw[i + 2 + len])
len++;
- /* update length of callchain */
- data.array[n] = len + 1;
+ off_cpu_raw[i] = len + 1;
+ i += len + 2;
+
+ off_cpu_raw[i++] = key.cgroup_id;
- /* update sample ip with the first callchain entry */
- if (ip_pos >= 0)
- data.array[ip_pos] = data.array[n + 2];
+ raw_size = i * sizeof(u64) + sizeof(u32); /* 4 bytes for alignment */
+ memcpy(raw_data, &raw_size, sizeof(raw_size));
+ memcpy(raw_data + sizeof(u32), off_cpu_raw, i * sizeof(u64));
- /* calculate sample callchain data array length */
- n += len + 2;
+ n += i + 1;
}
if (sample_type & PERF_SAMPLE_CGROUP)
data.array[n++] = key.cgroup_id;
--
2.43.0