[PATCH 3/5] perf record: Read and inject LOST_SAMPLES events

Namhyung Kim posted 5 patches 3 years, 7 months ago
[PATCH 3/5] perf record: Read and inject LOST_SAMPLES events
Posted by Namhyung Kim 3 years, 7 months ago
When there are lost samples, it can read the number of PERF_FORMAT_LOST and
convert it to PERF_RECORD_LOST_SAMPLES and write to the data file at the end.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/builtin-record.c | 60 +++++++++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index bce8c941d558..cb9881543a07 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -10,6 +10,7 @@
 
 #include "util/build-id.h"
 #include <subcmd/parse-options.h>
+#include <internal/xyarray.h>
 #include "util/parse-events.h"
 #include "util/config.h"
 
@@ -1852,6 +1853,64 @@ record__switch_output(struct record *rec, bool at_exit)
 	return fd;
 }
 
+static void __record__read_lost_samples(struct record *rec, struct evsel *evsel,
+					struct perf_record_lost_samples *lost,
+					int size, int cpu_idx, int thread_idx)
+{
+	struct perf_counts_values count;
+	struct perf_sample_id *sid;
+	struct perf_sample sample = {};
+
+	if (perf_evsel__read(&evsel->core, cpu_idx, thread_idx, &count) < 0) {
+		pr_err("read LOST count failed\n");
+		return;
+	}
+
+	if (count.lost == 0)
+		return;
+
+	lost->lost = count.lost;
+	if (evsel->core.ids) {
+		sid = xyarray__entry(evsel->core.sample_id, cpu_idx, thread_idx);
+		sample.id = sid->id;
+	}
+
+	perf_event__synthesize_id_sample((void *)(lost + 1),
+					 evsel->core.attr.sample_type, &sample);
+	record__write(rec, NULL, lost, size);
+}
+
+static void record__read_lost_samples(struct record *rec)
+{
+	struct perf_session *session = rec->session;
+	struct machine *machine = &session->machines.host;
+	struct perf_record_lost_samples *lost;
+	struct evsel *evsel;
+	int size = sizeof(*lost) + machine->id_hdr_size;
+
+	lost = zalloc(size);
+	lost->header.type = PERF_RECORD_LOST_SAMPLES;
+	lost->header.size = size;
+
+	evlist__for_each_entry(session->evlist, evsel) {
+		struct xyarray *xy = evsel->core.sample_id;
+
+		if (xyarray__max_x(evsel->core.fd) != xyarray__max_x(xy) ||
+		    xyarray__max_y(evsel->core.fd) != xyarray__max_y(xy)) {
+			pr_debug("Unmatched FD vs. sample ID: skip reading LOST count\n");
+			continue;
+		}
+
+		for (int x = 0; x < xyarray__max_x(xy); x++) {
+			for (int y = 0; y < xyarray__max_y(xy); y++) {
+				__record__read_lost_samples(rec, evsel, lost,
+							    size, x, y);
+			}
+		}
+	}
+
+}
+
 static volatile int workload_exec_errno;
 
 /*
@@ -2710,6 +2769,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 	if (rec->off_cpu)
 		rec->bytes_written += off_cpu_write(rec->session);
 
+	record__read_lost_samples(rec);
 	record__synthesize(rec, true);
 	/* this will be recalculated during process_buildids() */
 	rec->samples = 0;
-- 
2.37.2.789.g6183377224-goog
Re: [PATCH 3/5] perf record: Read and inject LOST_SAMPLES events
Posted by Adrian Hunter 3 years, 7 months ago
On 1/09/22 00:03, Namhyung Kim wrote:
> When there are lost samples, it can read the number of PERF_FORMAT_LOST and
> convert it to PERF_RECORD_LOST_SAMPLES and write to the data file at the end.
> 
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> ---
>  tools/perf/builtin-record.c | 60 +++++++++++++++++++++++++++++++++++++
>  1 file changed, 60 insertions(+)
> 
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index bce8c941d558..cb9881543a07 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -10,6 +10,7 @@
>  
>  #include "util/build-id.h"
>  #include <subcmd/parse-options.h>
> +#include <internal/xyarray.h>
>  #include "util/parse-events.h"
>  #include "util/config.h"
>  
> @@ -1852,6 +1853,64 @@ record__switch_output(struct record *rec, bool at_exit)
>  	return fd;
>  }
>  
> +static void __record__read_lost_samples(struct record *rec, struct evsel *evsel,
> +					struct perf_record_lost_samples *lost,
> +					int size, int cpu_idx, int thread_idx)
> +{
> +	struct perf_counts_values count;
> +	struct perf_sample_id *sid;
> +	struct perf_sample sample = {};
> +
> +	if (perf_evsel__read(&evsel->core, cpu_idx, thread_idx, &count) < 0) {
> +		pr_err("read LOST count failed\n");
> +		return;
> +	}
> +
> +	if (count.lost == 0)
> +		return;
> +
> +	lost->lost = count.lost;
> +	if (evsel->core.ids) {
> +		sid = xyarray__entry(evsel->core.sample_id, cpu_idx, thread_idx);
> +		sample.id = sid->id;
> +	}
> +
> +	perf_event__synthesize_id_sample((void *)(lost + 1),
> +					 evsel->core.attr.sample_type, &sample);

The ID sample size can vary with sample_type and is not necessarily the same as
machine->id_hdr_size.

The following might be more robust:

	id_hdr_size = perf_event__synthesize_id_sample((void *)(lost + 1), evsel->core.attr.sample_type, &sample);
	lost->header.size = sizeof(*lost) + id_hdr_size;

> +	record__write(rec, NULL, lost, size);
> +}
> +
> +static void record__read_lost_samples(struct record *rec)
> +{
> +	struct perf_session *session = rec->session;
> +	struct machine *machine = &session->machines.host;
> +	struct perf_record_lost_samples *lost;
> +	struct evsel *evsel;
> +	int size = sizeof(*lost) + machine->id_hdr_size;

  -	int size = sizeof(*lost) + machine->id_hdr_size;

> +
> +	lost = zalloc(size);

	lost = zalloc(PERF_SAMPLE_MAX_SIZE);


> +	lost->header.type = PERF_RECORD_LOST_SAMPLES;
> +	lost->header.size = size;

  -	lost->header.size = size;

> +
> +	evlist__for_each_entry(session->evlist, evsel) {
> +		struct xyarray *xy = evsel->core.sample_id;
> +
> +		if (xyarray__max_x(evsel->core.fd) != xyarray__max_x(xy) ||
> +		    xyarray__max_y(evsel->core.fd) != xyarray__max_y(xy)) {
> +			pr_debug("Unmatched FD vs. sample ID: skip reading LOST count\n");
> +			continue;
> +		}
> +
> +		for (int x = 0; x < xyarray__max_x(xy); x++) {
> +			for (int y = 0; y < xyarray__max_y(xy); y++) {
> +				__record__read_lost_samples(rec, evsel, lost,
> +							    size, x, y);
> +			}
> +		}
> +	}
> +
> +}
> +
>  static volatile int workload_exec_errno;
>  
>  /*
> @@ -2710,6 +2769,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
>  	if (rec->off_cpu)
>  		rec->bytes_written += off_cpu_write(rec->session);
>  
> +	record__read_lost_samples(rec);
>  	record__synthesize(rec, true);
>  	/* this will be recalculated during process_buildids() */
>  	rec->samples = 0;
Re: [PATCH 3/5] perf record: Read and inject LOST_SAMPLES events
Posted by Namhyung Kim 3 years, 7 months ago
Hi Adrian,

On Thu, Sep 1, 2022 at 4:23 AM Adrian Hunter <adrian.hunter@intel.com> wrote:
>
> On 1/09/22 00:03, Namhyung Kim wrote:
> > When there are lost samples, it can read the number of PERF_FORMAT_LOST and
> > convert it to PERF_RECORD_LOST_SAMPLES and write to the data file at the end.
> >
> > Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> > ---
> >  tools/perf/builtin-record.c | 60 +++++++++++++++++++++++++++++++++++++
> >  1 file changed, 60 insertions(+)
> >
> > diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> > index bce8c941d558..cb9881543a07 100644
> > --- a/tools/perf/builtin-record.c
> > +++ b/tools/perf/builtin-record.c
> > @@ -10,6 +10,7 @@
> >
> >  #include "util/build-id.h"
> >  #include <subcmd/parse-options.h>
> > +#include <internal/xyarray.h>
> >  #include "util/parse-events.h"
> >  #include "util/config.h"
> >
> > @@ -1852,6 +1853,64 @@ record__switch_output(struct record *rec, bool at_exit)
> >       return fd;
> >  }
> >
> > +static void __record__read_lost_samples(struct record *rec, struct evsel *evsel,
> > +                                     struct perf_record_lost_samples *lost,
> > +                                     int size, int cpu_idx, int thread_idx)
> > +{
> > +     struct perf_counts_values count;
> > +     struct perf_sample_id *sid;
> > +     struct perf_sample sample = {};
> > +
> > +     if (perf_evsel__read(&evsel->core, cpu_idx, thread_idx, &count) < 0) {
> > +             pr_err("read LOST count failed\n");
> > +             return;
> > +     }
> > +
> > +     if (count.lost == 0)
> > +             return;
> > +
> > +     lost->lost = count.lost;
> > +     if (evsel->core.ids) {
> > +             sid = xyarray__entry(evsel->core.sample_id, cpu_idx, thread_idx);
> > +             sample.id = sid->id;
> > +     }
> > +
> > +     perf_event__synthesize_id_sample((void *)(lost + 1),
> > +                                      evsel->core.attr.sample_type, &sample);
>
> The ID sample size can vary with sample_type and is not necessarily the same as
> machine->id_hdr_size.
>
> The following might be more robust:
>
>         id_hdr_size = perf_event__synthesize_id_sample((void *)(lost + 1), evsel->core.attr.sample_type, &sample);
>         lost->header.size = sizeof(*lost) + id_hdr_size;

Will change.

Thanks for your review!
Namhyung


>
> > +     record__write(rec, NULL, lost, size);
> > +}
> > +
> > +static void record__read_lost_samples(struct record *rec)
> > +{
> > +     struct perf_session *session = rec->session;
> > +     struct machine *machine = &session->machines.host;
> > +     struct perf_record_lost_samples *lost;
> > +     struct evsel *evsel;
> > +     int size = sizeof(*lost) + machine->id_hdr_size;
>
>   -     int size = sizeof(*lost) + machine->id_hdr_size;
>
> > +
> > +     lost = zalloc(size);
>
>         lost = zalloc(PERF_SAMPLE_MAX_SIZE);
>
>
> > +     lost->header.type = PERF_RECORD_LOST_SAMPLES;
> > +     lost->header.size = size;
>
>   -     lost->header.size = size;
>
> > +
> > +     evlist__for_each_entry(session->evlist, evsel) {
> > +             struct xyarray *xy = evsel->core.sample_id;
> > +
> > +             if (xyarray__max_x(evsel->core.fd) != xyarray__max_x(xy) ||
> > +                 xyarray__max_y(evsel->core.fd) != xyarray__max_y(xy)) {
> > +                     pr_debug("Unmatched FD vs. sample ID: skip reading LOST count\n");
> > +                     continue;
> > +             }
> > +
> > +             for (int x = 0; x < xyarray__max_x(xy); x++) {
> > +                     for (int y = 0; y < xyarray__max_y(xy); y++) {
> > +                             __record__read_lost_samples(rec, evsel, lost,
> > +                                                         size, x, y);
> > +                     }
> > +             }
> > +     }
> > +
> > +}
> > +
> >  static volatile int workload_exec_errno;
> >
> >  /*
> > @@ -2710,6 +2769,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
> >       if (rec->off_cpu)
> >               rec->bytes_written += off_cpu_write(rec->session);
> >
> > +     record__read_lost_samples(rec);
> >       record__synthesize(rec, true);
> >       /* this will be recalculated during process_buildids() */
> >       rec->samples = 0;
>