The perf_counts_values should be increased to read the new lost data.
Also adjust values after read according the read format.
This supports PERF_FORMAT_GROUP which has a different data format but
it's only available for leader events. Currently it doesn't have an API
to read sibling (member) events in the group. But users may read the
sibling event directly.
Also reading from mmap would be disabled when the read format has ID or
LOST bit as it's not exposed via mmap.
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
tools/lib/perf/evsel.c | 72 +++++++++++++++++++++++++++++
tools/lib/perf/include/perf/event.h | 3 +-
tools/lib/perf/include/perf/evsel.h | 4 +-
3 files changed, 77 insertions(+), 2 deletions(-)
diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c
index 952f3520d5c2..fc23670231cb 100644
--- a/tools/lib/perf/evsel.c
+++ b/tools/lib/perf/evsel.c
@@ -305,6 +305,9 @@ int perf_evsel__read_size(struct perf_evsel *evsel)
if (read_format & PERF_FORMAT_ID)
entry += sizeof(u64);
+ if (read_format & PERF_FORMAT_LOST)
+ entry += sizeof(u64);
+
if (read_format & PERF_FORMAT_GROUP) {
nr = evsel->nr_members;
size += sizeof(u64);
@@ -314,24 +317,93 @@ int perf_evsel__read_size(struct perf_evsel *evsel)
return size;
}
+/* This only reads values for the leader */
+static int perf_evsel__read_group(struct perf_evsel *evsel, int cpu_map_idx,
+ int thread, struct perf_counts_values *count)
+{
+ size_t size = perf_evsel__read_size(evsel);
+ int *fd = FD(evsel, cpu_map_idx, thread);
+ u64 read_format = evsel->attr.read_format;
+ u64 *data;
+ int idx = 1;
+
+ if (fd == NULL || *fd < 0)
+ return -EINVAL;
+
+ data = calloc(1, size);
+ if (data == NULL)
+ return -ENOMEM;
+
+ if (readn(*fd, data, size) <= 0) {
+ free(data);
+ return -errno;
+ }
+
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+ count->ena = data[idx++];
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+ count->run = data[idx++];
+
+ /* value is always available */
+ count->val = data[idx++];
+ if (read_format & PERF_FORMAT_ID)
+ count->id = data[idx++];
+ if (read_format & PERF_FORMAT_LOST)
+ count->lost = data[idx++];
+
+ free(data);
+ return 0;
+}
+
+/*
+ * The perf read format is very flexible. It needs to set the proper
+ * values according to the read format.
+ */
+static void perf_evsel__adjust_values(struct perf_evsel *evsel,
+ struct perf_counts_values *count)
+{
+ u64 read_format = evsel->attr.read_format;
+
+ if (!(read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)) {
+ memmove(&count->values[2], &count->values[1], 24);
+ count->ena = 0;
+ }
+
+ if (!(read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)) {
+ memmove(&count->values[3], &count->values[2], 16);
+ count->run = 0;
+ }
+
+ if (!(read_format & PERF_FORMAT_ID)) {
+ memmove(&count->values[4], &count->values[3], 8);
+ count->id = 0;
+ }
+}
+
int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int thread,
struct perf_counts_values *count)
{
size_t size = perf_evsel__read_size(evsel);
int *fd = FD(evsel, cpu_map_idx, thread);
+ u64 read_format = evsel->attr.read_format;
memset(count, 0, sizeof(*count));
if (fd == NULL || *fd < 0)
return -EINVAL;
+ if (read_format & PERF_FORMAT_GROUP)
+ return perf_evsel__read_group(evsel, cpu_map_idx, thread, count);
+
if (MMAP(evsel, cpu_map_idx, thread) &&
+ !(read_format & (PERF_FORMAT_ID | PERF_FORMAT_LOST)) &&
!perf_mmap__read_self(MMAP(evsel, cpu_map_idx, thread), count))
return 0;
if (readn(*fd, count->values, size) <= 0)
return -errno;
+ perf_evsel__adjust_values(evsel, count);
return 0;
}
diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h
index 556bb06798f2..38dd35cbca71 100644
--- a/tools/lib/perf/include/perf/event.h
+++ b/tools/lib/perf/include/perf/event.h
@@ -76,7 +76,7 @@ struct perf_record_lost_samples {
};
/*
- * PERF_FORMAT_ENABLED | PERF_FORMAT_RUNNING | PERF_FORMAT_ID
+ * PERF_FORMAT_ENABLED | PERF_FORMAT_RUNNING | PERF_FORMAT_ID | PERF_FORMAT_LOST
*/
struct perf_record_read {
struct perf_event_header header;
@@ -85,6 +85,7 @@ struct perf_record_read {
__u64 time_enabled;
__u64 time_running;
__u64 id;
+ __u64 lost;
};
struct perf_record_throttle {
diff --git a/tools/lib/perf/include/perf/evsel.h b/tools/lib/perf/include/perf/evsel.h
index 699c0ed97d34..6f92204075c2 100644
--- a/tools/lib/perf/include/perf/evsel.h
+++ b/tools/lib/perf/include/perf/evsel.h
@@ -18,8 +18,10 @@ struct perf_counts_values {
uint64_t val;
uint64_t ena;
uint64_t run;
+ uint64_t id;
+ uint64_t lost;
};
- uint64_t values[3];
+ uint64_t values[5];
};
};
--
2.37.1.595.g718a3a8f04-goog
On Mon, Aug 15, 2022 at 12:01:04PM -0700, Namhyung Kim wrote:
> The perf_counts_values should be increased to read the new lost data.
> Also adjust values after read according the read format.
>
> This supports PERF_FORMAT_GROUP which has a different data format but
> it's only available for leader events. Currently it doesn't have an API
> to read sibling (member) events in the group. But users may read the
> sibling event directly.
>
> Also reading from mmap would be disabled when the read format has ID or
> LOST bit as it's not exposed via mmap.
>
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> ---
> tools/lib/perf/evsel.c | 72 +++++++++++++++++++++++++++++
> tools/lib/perf/include/perf/event.h | 3 +-
> tools/lib/perf/include/perf/evsel.h | 4 +-
> 3 files changed, 77 insertions(+), 2 deletions(-)
>
> diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c
> index 952f3520d5c2..fc23670231cb 100644
> --- a/tools/lib/perf/evsel.c
> +++ b/tools/lib/perf/evsel.c
> @@ -305,6 +305,9 @@ int perf_evsel__read_size(struct perf_evsel *evsel)
> if (read_format & PERF_FORMAT_ID)
> entry += sizeof(u64);
>
> + if (read_format & PERF_FORMAT_LOST)
> + entry += sizeof(u64);
> +
> if (read_format & PERF_FORMAT_GROUP) {
> nr = evsel->nr_members;
> size += sizeof(u64);
> @@ -314,24 +317,93 @@ int perf_evsel__read_size(struct perf_evsel *evsel)
> return size;
> }
>
> +/* This only reads values for the leader */
> +static int perf_evsel__read_group(struct perf_evsel *evsel, int cpu_map_idx,
> + int thread, struct perf_counts_values *count)
> +{
> + size_t size = perf_evsel__read_size(evsel);
> + int *fd = FD(evsel, cpu_map_idx, thread);
> + u64 read_format = evsel->attr.read_format;
> + u64 *data;
> + int idx = 1;
> +
> + if (fd == NULL || *fd < 0)
> + return -EINVAL;
> +
> + data = calloc(1, size);
> + if (data == NULL)
> + return -ENOMEM;
> +
> + if (readn(*fd, data, size) <= 0) {
> + free(data);
> + return -errno;
> + }
could you please put in here some comment that this is intentionaly
reading only the leader or better yet rename the function? I was lost
before I got to read the changelog ;-)
> +
> + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
> + count->ena = data[idx++];
> + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
> + count->run = data[idx++];
> +
> + /* value is always available */
> + count->val = data[idx++];
> + if (read_format & PERF_FORMAT_ID)
> + count->id = data[idx++];
> + if (read_format & PERF_FORMAT_LOST)
> + count->lost = data[idx++];
> +
> + free(data);
> + return 0;
> +}
> +
> +/*
> + * The perf read format is very flexible. It needs to set the proper
> + * values according to the read format.
> + */
> +static void perf_evsel__adjust_values(struct perf_evsel *evsel,
> + struct perf_counts_values *count)
> +{
> + u64 read_format = evsel->attr.read_format;
> +
> + if (!(read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)) {
> + memmove(&count->values[2], &count->values[1], 24);
> + count->ena = 0;
> + }
> +
> + if (!(read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)) {
> + memmove(&count->values[3], &count->values[2], 16);
> + count->run = 0;
> + }
> +
> + if (!(read_format & PERF_FORMAT_ID)) {
> + memmove(&count->values[4], &count->values[3], 8);
> + count->id = 0;
> + }
> +}
could we do this the same way we read group counters.. like make read
into local buffer and initialize perf_counts_values values based on
format, something like:
readn(fd, data ...
if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
count->ena = data[idx++];
if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
count->run = data[idx++];
/* value is always available */
count->val = data[idx++];
if (read_format & PERF_FORMAT_ID)
count->id = data[idx++];
if (read_format & PERF_FORMAT_LOST)
count->lost = data[idx++];
and perhaps we should cancel that perf_counts_values's union and keep
only 'val/ena/run...' fields?
jirka
Hi Jiri,
On Tue, Aug 16, 2022 at 6:19 AM Jiri Olsa <olsajiri@gmail.com> wrote:
>
> On Mon, Aug 15, 2022 at 12:01:04PM -0700, Namhyung Kim wrote:
> > The perf_counts_values should be increased to read the new lost data.
> > Also adjust values after read according the read format.
> >
> > This supports PERF_FORMAT_GROUP which has a different data format but
> > it's only available for leader events. Currently it doesn't have an API
> > to read sibling (member) events in the group. But users may read the
> > sibling event directly.
> >
> > Also reading from mmap would be disabled when the read format has ID or
> > LOST bit as it's not exposed via mmap.
> >
> > Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> > ---
> > tools/lib/perf/evsel.c | 72 +++++++++++++++++++++++++++++
> > tools/lib/perf/include/perf/event.h | 3 +-
> > tools/lib/perf/include/perf/evsel.h | 4 +-
> > 3 files changed, 77 insertions(+), 2 deletions(-)
> >
> > diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c
> > index 952f3520d5c2..fc23670231cb 100644
> > --- a/tools/lib/perf/evsel.c
> > +++ b/tools/lib/perf/evsel.c
> > @@ -305,6 +305,9 @@ int perf_evsel__read_size(struct perf_evsel *evsel)
> > if (read_format & PERF_FORMAT_ID)
> > entry += sizeof(u64);
> >
> > + if (read_format & PERF_FORMAT_LOST)
> > + entry += sizeof(u64);
> > +
> > if (read_format & PERF_FORMAT_GROUP) {
> > nr = evsel->nr_members;
> > size += sizeof(u64);
> > @@ -314,24 +317,93 @@ int perf_evsel__read_size(struct perf_evsel *evsel)
> > return size;
> > }
> >
> > +/* This only reads values for the leader */
> > +static int perf_evsel__read_group(struct perf_evsel *evsel, int cpu_map_idx,
> > + int thread, struct perf_counts_values *count)
> > +{
> > + size_t size = perf_evsel__read_size(evsel);
> > + int *fd = FD(evsel, cpu_map_idx, thread);
> > + u64 read_format = evsel->attr.read_format;
> > + u64 *data;
> > + int idx = 1;
> > +
> > + if (fd == NULL || *fd < 0)
> > + return -EINVAL;
> > +
> > + data = calloc(1, size);
> > + if (data == NULL)
> > + return -ENOMEM;
> > +
> > + if (readn(*fd, data, size) <= 0) {
> > + free(data);
> > + return -errno;
> > + }
>
> could you please put in here some comment that this is intentionaly
> reading only the leader or better yet rename the function? I was lost
> before I got to read the changelog ;-)
Sure, actually it has a comment above the function signature.
Maybe we can add an API to read whole group counters
but it could be a follow up.
>
> > +
> > + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
> > + count->ena = data[idx++];
> > + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
> > + count->run = data[idx++];
> > +
> > + /* value is always available */
> > + count->val = data[idx++];
> > + if (read_format & PERF_FORMAT_ID)
> > + count->id = data[idx++];
> > + if (read_format & PERF_FORMAT_LOST)
> > + count->lost = data[idx++];
> > +
> > + free(data);
> > + return 0;
> > +}
> > +
> > +/*
> > + * The perf read format is very flexible. It needs to set the proper
> > + * values according to the read format.
> > + */
> > +static void perf_evsel__adjust_values(struct perf_evsel *evsel,
> > + struct perf_counts_values *count)
> > +{
> > + u64 read_format = evsel->attr.read_format;
> > +
> > + if (!(read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)) {
> > + memmove(&count->values[2], &count->values[1], 24);
> > + count->ena = 0;
> > + }
> > +
> > + if (!(read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)) {
> > + memmove(&count->values[3], &count->values[2], 16);
> > + count->run = 0;
> > + }
> > +
> > + if (!(read_format & PERF_FORMAT_ID)) {
> > + memmove(&count->values[4], &count->values[3], 8);
> > + count->id = 0;
> > + }
> > +}
>
>
> could we do this the same way we read group counters.. like make read
> into local buffer and initialize perf_counts_values values based on
> format, something like:
>
> readn(fd, data ...
>
> if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
> count->ena = data[idx++];
> if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
> count->run = data[idx++];
>
> /* value is always available */
> count->val = data[idx++];
> if (read_format & PERF_FORMAT_ID)
> count->id = data[idx++];
> if (read_format & PERF_FORMAT_LOST)
> count->lost = data[idx++];
Sure.
>
>
> and perhaps we should cancel that perf_counts_values's union and keep
> only 'val/ena/run...' fields?
I'd like to have them and use it for reading the lost count soon.
Thanks,
Namhyung
Em Tue, Aug 16, 2022 at 03:19:20PM +0200, Jiri Olsa escreveu:
> On Mon, Aug 15, 2022 at 12:01:04PM -0700, Namhyung Kim wrote:
> > The perf_counts_values should be increased to read the new lost data.
> > Also adjust values after read according the read format.
> >
> > This supports PERF_FORMAT_GROUP which has a different data format but
> > it's only available for leader events. Currently it doesn't have an API
> > to read sibling (member) events in the group. But users may read the
> > sibling event directly.
> >
> > Also reading from mmap would be disabled when the read format has ID or
> > LOST bit as it's not exposed via mmap.
> >
> > Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> > ---
> > tools/lib/perf/evsel.c | 72 +++++++++++++++++++++++++++++
> > tools/lib/perf/include/perf/event.h | 3 +-
> > tools/lib/perf/include/perf/evsel.h | 4 +-
> > 3 files changed, 77 insertions(+), 2 deletions(-)
> >
> > diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c
> > index 952f3520d5c2..fc23670231cb 100644
> > --- a/tools/lib/perf/evsel.c
> > +++ b/tools/lib/perf/evsel.c
> > @@ -305,6 +305,9 @@ int perf_evsel__read_size(struct perf_evsel *evsel)
> > if (read_format & PERF_FORMAT_ID)
> > entry += sizeof(u64);
> >
> > + if (read_format & PERF_FORMAT_LOST)
> > + entry += sizeof(u64);
> > +
> > if (read_format & PERF_FORMAT_GROUP) {
> > nr = evsel->nr_members;
> > size += sizeof(u64);
> > @@ -314,24 +317,93 @@ int perf_evsel__read_size(struct perf_evsel *evsel)
> > return size;
> > }
> >
> > +/* This only reads values for the leader */
> > +static int perf_evsel__read_group(struct perf_evsel *evsel, int cpu_map_idx,
> > + int thread, struct perf_counts_values *count)
> > +{
> > + size_t size = perf_evsel__read_size(evsel);
> > + int *fd = FD(evsel, cpu_map_idx, thread);
> > + u64 read_format = evsel->attr.read_format;
> > + u64 *data;
> > + int idx = 1;
> > +
> > + if (fd == NULL || *fd < 0)
> > + return -EINVAL;
> > +
> > + data = calloc(1, size);
> > + if (data == NULL)
> > + return -ENOMEM;
> > +
> > + if (readn(*fd, data, size) <= 0) {
> > + free(data);
> > + return -errno;
> > + }
>
> could you please put in here some comment that this is intentionaly
> reading only the leader or better yet rename the function? I was lost
> before I got to read the changelog ;-)
>
> > +
> > + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
> > + count->ena = data[idx++];
> > + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
> > + count->run = data[idx++];
> > +
> > + /* value is always available */
> > + count->val = data[idx++];
> > + if (read_format & PERF_FORMAT_ID)
> > + count->id = data[idx++];
> > + if (read_format & PERF_FORMAT_LOST)
> > + count->lost = data[idx++];
> > +
> > + free(data);
> > + return 0;
> > +}
> > +
> > +/*
> > + * The perf read format is very flexible. It needs to set the proper
> > + * values according to the read format.
> > + */
> > +static void perf_evsel__adjust_values(struct perf_evsel *evsel,
> > + struct perf_counts_values *count)
> > +{
> > + u64 read_format = evsel->attr.read_format;
> > +
> > + if (!(read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)) {
> > + memmove(&count->values[2], &count->values[1], 24);
> > + count->ena = 0;
> > + }
> > +
> > + if (!(read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)) {
> > + memmove(&count->values[3], &count->values[2], 16);
> > + count->run = 0;
> > + }
> > +
> > + if (!(read_format & PERF_FORMAT_ID)) {
> > + memmove(&count->values[4], &count->values[3], 8);
> > + count->id = 0;
> > + }
> > +}
>
>
> could we do this the same way we read group counters.. like make read
> into local buffer and initialize perf_counts_values values based on
> format, something like:
>
> readn(fd, data ...
>
> if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
> count->ena = data[idx++];
> if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
> count->run = data[idx++];
>
> /* value is always available */
> count->val = data[idx++];
> if (read_format & PERF_FORMAT_ID)
> count->id = data[idx++];
> if (read_format & PERF_FORMAT_LOST)
> count->lost = data[idx++];
I see this now, so I'll wait for Namhyung to react to your comments,
while keeping it in my local tree just to have it build tested.
- Arnaldo
>
> and perhaps we should cancel that perf_counts_values's union and keep
> only 'val/ena/run...' fields?
>
> jirka
--
- Arnaldo
© 2016 - 2026 Red Hat, Inc.