From: Weilin Wang <weilin.wang@intel.com>
In current :R parsing implementation, the parser would recognize events with
retire_latency modifier and insert them into the evlist like a normal event.
Ideally, we need to avoid counting these events.
In this commit, at the time when a retire_latency evsel is read, set the retire
latency value processed from the sampled data to count value. This sampled
retire latency value will be used for metric calculation and final event count
print out.
Signed-off-by: Weilin Wang <weilin.wang@intel.com>
---
tools/perf/arch/x86/util/evlist.c | 6 +++++
tools/perf/util/evsel.c | 44 +++++++++++++++++++++++++++++++
tools/perf/util/evsel.h | 5 ++++
3 files changed, 55 insertions(+)
diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c
index b1ce0c52d88d..cebdd483149e 100644
--- a/tools/perf/arch/x86/util/evlist.c
+++ b/tools/perf/arch/x86/util/evlist.c
@@ -89,6 +89,12 @@ int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs)
return 1;
}
+ /* Retire latency event should not be group leader*/
+ if (lhs->retire_lat && !rhs->retire_lat)
+ return 1;
+ if (!lhs->retire_lat && rhs->retire_lat)
+ return -1;
+
/* Default ordering by insertion index. */
return lhs->core.idx - rhs->core.idx;
}
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index a0a8aee7d6b9..4d700338fc99 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -58,6 +58,7 @@
#include <internal/xyarray.h>
#include <internal/lib.h>
#include <internal/threadmap.h>
+#include "util/intel-tpebs.h"
#include <linux/ctype.h>
@@ -1523,6 +1524,40 @@ static int evsel__read_one(struct evsel *evsel, int cpu_map_idx, int thread)
return perf_evsel__read(&evsel->core, cpu_map_idx, thread, count);
}
+static int evsel__set_retire_lat(struct evsel *evsel, int cpu_map_idx, int thread)
+{
+ struct perf_counts_values *count;
+ struct tpebs_retire_lat *t;
+ bool found = false;
+ __u64 val;
+
+ count = perf_counts(evsel->counts, cpu_map_idx, thread);
+
+ list_for_each_entry(t, &tpebs_results, nd) {
+ if (!strcmp(t->tpebs_name, evsel->name)) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found)
+ return -1;
+
+ /*
+ * Only set retire_latency value to the first CPU and thread.
+ */
+ if (cpu_map_idx == 0 && thread == 0)
+ val = t->val;
+ else
+ val = 0;
+
+ count->val = val;
+ /* Set ena and run to non-zero */
+ count->ena = count->run = 1;
+ count->lost = 0;
+ return 0;
+}
+
static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int thread,
u64 val, u64 ena, u64 run, u64 lost)
{
@@ -1530,6 +1565,12 @@ static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int thread,
count = perf_counts(counter->counts, cpu_map_idx, thread);
+ if (counter->retire_lat) {
+ evsel__set_retire_lat(counter, cpu_map_idx, thread);
+ perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, true);
+ return;
+ }
+
count->val = val;
count->ena = ena;
count->run = run;
@@ -1778,6 +1819,9 @@ int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread)
if (evsel__is_tool(evsel))
return evsel__read_tool(evsel, cpu_map_idx, thread);
+ if (evsel__is_retire_lat(evsel))
+ return evsel__set_retire_lat(evsel, cpu_map_idx, thread);
+
if (evsel->core.attr.read_format & PERF_FORMAT_GROUP)
return evsel__read_group(evsel, cpu_map_idx, thread);
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index bd8e84954e34..aaf572317e92 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -303,6 +303,11 @@ static inline bool evsel__is_tool(const struct evsel *evsel)
return evsel->tool_event != PERF_TOOL_NONE;
}
+static inline bool evsel__is_retire_lat(const struct evsel *evsel)
+{
+ return evsel->retire_lat;
+}
+
const char *evsel__group_name(struct evsel *evsel);
int evsel__group_desc(struct evsel *evsel, char *buf, size_t size);
--
2.43.0
On Tue, May 21, 2024 at 10:40 AM <weilin.wang@intel.com> wrote:
>
> From: Weilin Wang <weilin.wang@intel.com>
>
> In current :R parsing implementation, the parser would recognize events with
> retire_latency modifier and insert them into the evlist like a normal event.
> Ideally, we need to avoid counting these events.
>
> In this commit, at the time when a retire_latency evsel is read, set the retire
> latency value processed from the sampled data to count value. This sampled
> retire latency value will be used for metric calculation and final event count
> print out.
I'm confused. Do you mean you don't count the event with 'R' modifier
(w/ perf stat) and just print the (average) retire latency (from perf record)?
>
> Signed-off-by: Weilin Wang <weilin.wang@intel.com>
> ---
> tools/perf/arch/x86/util/evlist.c | 6 +++++
> tools/perf/util/evsel.c | 44 +++++++++++++++++++++++++++++++
> tools/perf/util/evsel.h | 5 ++++
> 3 files changed, 55 insertions(+)
>
> diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c
> index b1ce0c52d88d..cebdd483149e 100644
> --- a/tools/perf/arch/x86/util/evlist.c
> +++ b/tools/perf/arch/x86/util/evlist.c
> @@ -89,6 +89,12 @@ int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs)
> return 1;
> }
>
> + /* Retire latency event should not be group leader*/
Hmm.. why?
> + if (lhs->retire_lat && !rhs->retire_lat)
> + return 1;
> + if (!lhs->retire_lat && rhs->retire_lat)
> + return -1;
> +
> /* Default ordering by insertion index. */
> return lhs->core.idx - rhs->core.idx;
> }
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index a0a8aee7d6b9..4d700338fc99 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -58,6 +58,7 @@
> #include <internal/xyarray.h>
> #include <internal/lib.h>
> #include <internal/threadmap.h>
> +#include "util/intel-tpebs.h"
>
> #include <linux/ctype.h>
>
> @@ -1523,6 +1524,40 @@ static int evsel__read_one(struct evsel *evsel, int cpu_map_idx, int thread)
> return perf_evsel__read(&evsel->core, cpu_map_idx, thread, count);
> }
>
> +static int evsel__set_retire_lat(struct evsel *evsel, int cpu_map_idx, int thread)
> +{
> + struct perf_counts_values *count;
> + struct tpebs_retire_lat *t;
> + bool found = false;
> + __u64 val;
> +
> + count = perf_counts(evsel->counts, cpu_map_idx, thread);
> +
> + list_for_each_entry(t, &tpebs_results, nd) {
> + if (!strcmp(t->tpebs_name, evsel->name)) {
> + found = true;
> + break;
> + }
> + }
> +
> + if (!found)
> + return -1;
> +
> + /*
> + * Only set retire_latency value to the first CPU and thread.
> + */
> + if (cpu_map_idx == 0 && thread == 0)
> + val = t->val;
> + else
> + val = 0;
> +
> + count->val = val;
> + /* Set ena and run to non-zero */
> + count->ena = count->run = 1;
> + count->lost = 0;
So here it seems you discard the actual count of the events
and replace it with the retire latency. That means you don't
need to open the event in perf stat, and probably just have a
placeholder, right?
Btw, I think it's better to move this logic to intel-tpebs.c file and
rename to tpebs_set_retire_lat().
> + return 0;
> +}
> +
> static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int thread,
> u64 val, u64 ena, u64 run, u64 lost)
> {
> @@ -1530,6 +1565,12 @@ static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int thread,
>
> count = perf_counts(counter->counts, cpu_map_idx, thread);
>
> + if (counter->retire_lat) {
if (evsel__is_retire_lat(counter)) ?
> + evsel__set_retire_lat(counter, cpu_map_idx, thread);
> + perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, true);
> + return;
> + }
> +
> count->val = val;
> count->ena = ena;
> count->run = run;
> @@ -1778,6 +1819,9 @@ int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread)
> if (evsel__is_tool(evsel))
> return evsel__read_tool(evsel, cpu_map_idx, thread);
>
> + if (evsel__is_retire_lat(evsel))
> + return evsel__set_retire_lat(evsel, cpu_map_idx, thread);
> +
I'm not sure if it works well with group event. Probably that's
why you wanted to prevent group leaders. But I guess you
can just check this after the PERF_FORMAT_GROUP, no?
Thanks,
Namhyung
> if (evsel->core.attr.read_format & PERF_FORMAT_GROUP)
> return evsel__read_group(evsel, cpu_map_idx, thread);
>
> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> index bd8e84954e34..aaf572317e92 100644
> --- a/tools/perf/util/evsel.h
> +++ b/tools/perf/util/evsel.h
> @@ -303,6 +303,11 @@ static inline bool evsel__is_tool(const struct evsel *evsel)
> return evsel->tool_event != PERF_TOOL_NONE;
> }
>
> +static inline bool evsel__is_retire_lat(const struct evsel *evsel)
> +{
> + return evsel->retire_lat;
> +}
> +
> const char *evsel__group_name(struct evsel *evsel);
> int evsel__group_desc(struct evsel *evsel, char *buf, size_t size);
>
> --
> 2.43.0
>
> -----Original Message-----
> From: Namhyung Kim <namhyung@kernel.org>
> Sent: Friday, May 24, 2024 4:17 PM
> To: Wang, Weilin <weilin.wang@intel.com>
> Cc: Ian Rogers <irogers@google.com>; Arnaldo Carvalho de Melo
> <acme@kernel.org>; Peter Zijlstra <peterz@infradead.org>; Ingo Molnar
> <mingo@redhat.com>; Alexander Shishkin
> <alexander.shishkin@linux.intel.com>; Jiri Olsa <jolsa@kernel.org>; Hunter,
> Adrian <adrian.hunter@intel.com>; Kan Liang <kan.liang@linux.intel.com>;
> linux-perf-users@vger.kernel.org; linux-kernel@vger.kernel.org; Taylor, Perry
> <perry.taylor@intel.com>; Alt, Samantha <samantha.alt@intel.com>; Biggers,
> Caleb <caleb.biggers@intel.com>
> Subject: Re: [RFC PATCH v9 4/7] perf stat: Plugin retire_lat value from sampled
> data to evsel
>
> On Tue, May 21, 2024 at 10:40 AM <weilin.wang@intel.com> wrote:
> >
> > From: Weilin Wang <weilin.wang@intel.com>
> >
> > In current :R parsing implementation, the parser would recognize events
> with
> > retire_latency modifier and insert them into the evlist like a normal event.
> > Ideally, we need to avoid counting these events.
> >
> > In this commit, at the time when a retire_latency evsel is read, set the retire
> > latency value processed from the sampled data to count value. This sampled
> > retire latency value will be used for metric calculation and final event count
> > print out.
>
> I'm confused. Do you mean you don't count the event with 'R' modifier
> (w/ perf stat) and just print the (average) retire latency (from perf record)?
In metric formulas, event without 'R' modifier is included as a normal event already.
So we don't need to count the event that with 'R' modifier. They only need to be
sampled.
>
> >
> > Signed-off-by: Weilin Wang <weilin.wang@intel.com>
> > ---
> > tools/perf/arch/x86/util/evlist.c | 6 +++++
> > tools/perf/util/evsel.c | 44 +++++++++++++++++++++++++++++++
> > tools/perf/util/evsel.h | 5 ++++
> > 3 files changed, 55 insertions(+)
> >
> > diff --git a/tools/perf/arch/x86/util/evlist.c
> b/tools/perf/arch/x86/util/evlist.c
> > index b1ce0c52d88d..cebdd483149e 100644
> > --- a/tools/perf/arch/x86/util/evlist.c
> > +++ b/tools/perf/arch/x86/util/evlist.c
> > @@ -89,6 +89,12 @@ int arch_evlist__cmp(const struct evsel *lhs, const
> struct evsel *rhs)
> > return 1;
> > }
> >
> > + /* Retire latency event should not be group leader*/
>
> Hmm.. why?
Because we don't want to count them. Make them the group leader would not work.
>
> > + if (lhs->retire_lat && !rhs->retire_lat)
> > + return 1;
> > + if (!lhs->retire_lat && rhs->retire_lat)
> > + return -1;
> > +
> > /* Default ordering by insertion index. */
> > return lhs->core.idx - rhs->core.idx;
> > }
> > diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> > index a0a8aee7d6b9..4d700338fc99 100644
> > --- a/tools/perf/util/evsel.c
> > +++ b/tools/perf/util/evsel.c
> > @@ -58,6 +58,7 @@
> > #include <internal/xyarray.h>
> > #include <internal/lib.h>
> > #include <internal/threadmap.h>
> > +#include "util/intel-tpebs.h"
> >
> > #include <linux/ctype.h>
> >
> > @@ -1523,6 +1524,40 @@ static int evsel__read_one(struct evsel *evsel,
> int cpu_map_idx, int thread)
> > return perf_evsel__read(&evsel->core, cpu_map_idx, thread, count);
> > }
> >
> > +static int evsel__set_retire_lat(struct evsel *evsel, int cpu_map_idx, int
> thread)
> > +{
> > + struct perf_counts_values *count;
> > + struct tpebs_retire_lat *t;
> > + bool found = false;
> > + __u64 val;
> > +
> > + count = perf_counts(evsel->counts, cpu_map_idx, thread);
> > +
> > + list_for_each_entry(t, &tpebs_results, nd) {
> > + if (!strcmp(t->tpebs_name, evsel->name)) {
> > + found = true;
> > + break;
> > + }
> > + }
> > +
> > + if (!found)
> > + return -1;
> > +
> > + /*
> > + * Only set retire_latency value to the first CPU and thread.
> > + */
> > + if (cpu_map_idx == 0 && thread == 0)
> > + val = t->val;
> > + else
> > + val = 0;
> > +
> > + count->val = val;
> > + /* Set ena and run to non-zero */
> > + count->ena = count->run = 1;
> > + count->lost = 0;
>
> So here it seems you discard the actual count of the events
> and replace it with the retire latency. That means you don't
> need to open the event in perf stat, and probably just have a
> placeholder, right?
>
> Btw, I think it's better to move this logic to intel-tpebs.c file and
> rename to tpebs_set_retire_lat().
Ian wants this to be here and also suggested me to rename this function to
evsel__read_retire_lat(). I'm ok with either way.
Thanks,
Weilin
>
>
> > + return 0;
> > +}
> > +
> > static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int
> thread,
> > u64 val, u64 ena, u64 run, u64 lost)
> > {
> > @@ -1530,6 +1565,12 @@ static void evsel__set_count(struct evsel
> *counter, int cpu_map_idx, int thread,
> >
> > count = perf_counts(counter->counts, cpu_map_idx, thread);
> >
> > + if (counter->retire_lat) {
>
> if (evsel__is_retire_lat(counter)) ?
>
>
> > + evsel__set_retire_lat(counter, cpu_map_idx, thread);
> > + perf_counts__set_loaded(counter->counts, cpu_map_idx, thread,
> true);
> > + return;
> > + }
> > +
> > count->val = val;
> > count->ena = ena;
> > count->run = run;
> > @@ -1778,6 +1819,9 @@ int evsel__read_counter(struct evsel *evsel, int
> cpu_map_idx, int thread)
> > if (evsel__is_tool(evsel))
> > return evsel__read_tool(evsel, cpu_map_idx, thread);
> >
> > + if (evsel__is_retire_lat(evsel))
> > + return evsel__set_retire_lat(evsel, cpu_map_idx, thread);
> > +
>
> I'm not sure if it works well with group event. Probably that's
> why you wanted to prevent group leaders. But I guess you
> can just check this after the PERF_FORMAT_GROUP, no?
>
> Thanks,
> Namhyung
>
>
> > if (evsel->core.attr.read_format & PERF_FORMAT_GROUP)
> > return evsel__read_group(evsel, cpu_map_idx, thread);
> >
> > diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> > index bd8e84954e34..aaf572317e92 100644
> > --- a/tools/perf/util/evsel.h
> > +++ b/tools/perf/util/evsel.h
> > @@ -303,6 +303,11 @@ static inline bool evsel__is_tool(const struct evsel
> *evsel)
> > return evsel->tool_event != PERF_TOOL_NONE;
> > }
> >
> > +static inline bool evsel__is_retire_lat(const struct evsel *evsel)
> > +{
> > + return evsel->retire_lat;
> > +}
> > +
> > const char *evsel__group_name(struct evsel *evsel);
> > int evsel__group_desc(struct evsel *evsel, char *buf, size_t size);
> >
> > --
> > 2.43.0
> >
On Fri, May 24, 2024 at 4:52 PM Wang, Weilin <weilin.wang@intel.com> wrote:
>
>
>
> > -----Original Message-----
> > From: Namhyung Kim <namhyung@kernel.org>
> > Sent: Friday, May 24, 2024 4:17 PM
> > To: Wang, Weilin <weilin.wang@intel.com>
> > Cc: Ian Rogers <irogers@google.com>; Arnaldo Carvalho de Melo
> > <acme@kernel.org>; Peter Zijlstra <peterz@infradead.org>; Ingo Molnar
> > <mingo@redhat.com>; Alexander Shishkin
> > <alexander.shishkin@linux.intel.com>; Jiri Olsa <jolsa@kernel.org>; Hunter,
> > Adrian <adrian.hunter@intel.com>; Kan Liang <kan.liang@linux.intel.com>;
> > linux-perf-users@vger.kernel.org; linux-kernel@vger.kernel.org; Taylor, Perry
> > <perry.taylor@intel.com>; Alt, Samantha <samantha.alt@intel.com>; Biggers,
> > Caleb <caleb.biggers@intel.com>
> > Subject: Re: [RFC PATCH v9 4/7] perf stat: Plugin retire_lat value from sampled
> > data to evsel
> >
> > On Tue, May 21, 2024 at 10:40 AM <weilin.wang@intel.com> wrote:
> > >
> > > From: Weilin Wang <weilin.wang@intel.com>
> > >
> > > In current :R parsing implementation, the parser would recognize events
> > with
> > > retire_latency modifier and insert them into the evlist like a normal event.
> > > Ideally, we need to avoid counting these events.
> > >
> > > In this commit, at the time when a retire_latency evsel is read, set the retire
> > > latency value processed from the sampled data to count value. This sampled
> > > retire latency value will be used for metric calculation and final event count
> > > print out.
> >
> > I'm confused. Do you mean you don't count the event with 'R' modifier
> > (w/ perf stat) and just print the (average) retire latency (from perf record)?
>
> In metric formulas, event without 'R' modifier is included as a normal event already.
> So we don't need to count the event that with 'R' modifier. They only need to be
> sampled.
Oh, you have the event in the metric expression twice. I thought of one.
Then IIUC the metric looks something like this.
myevent1 + (myevent2 * myevent1:R)
I think you'll have 2 myevent1 in perf stat and 1 in perf record, right?
But the second one in perf stat is never used and the value is updated
from perf record.
Then we can simply remove the event from the evlist (or replace it with
a dummy) to reduce the overheads (of open and read).
>
> >
> > >
> > > Signed-off-by: Weilin Wang <weilin.wang@intel.com>
> > > ---
> > > tools/perf/arch/x86/util/evlist.c | 6 +++++
> > > tools/perf/util/evsel.c | 44 +++++++++++++++++++++++++++++++
> > > tools/perf/util/evsel.h | 5 ++++
> > > 3 files changed, 55 insertions(+)
> > >
> > > diff --git a/tools/perf/arch/x86/util/evlist.c
> > b/tools/perf/arch/x86/util/evlist.c
> > > index b1ce0c52d88d..cebdd483149e 100644
> > > --- a/tools/perf/arch/x86/util/evlist.c
> > > +++ b/tools/perf/arch/x86/util/evlist.c
> > > @@ -89,6 +89,12 @@ int arch_evlist__cmp(const struct evsel *lhs, const
> > struct evsel *rhs)
> > > return 1;
> > > }
> > >
> > > + /* Retire latency event should not be group leader*/
> >
> > Hmm.. why?
> Because we don't want to count them. Make them the group leader would not work.
I don't understand. You'll read the event regardless of being a
leader or not.
>
> >
> > > + if (lhs->retire_lat && !rhs->retire_lat)
> > > + return 1;
> > > + if (!lhs->retire_lat && rhs->retire_lat)
> > > + return -1;
> > > +
> > > /* Default ordering by insertion index. */
> > > return lhs->core.idx - rhs->core.idx;
> > > }
> > > diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> > > index a0a8aee7d6b9..4d700338fc99 100644
> > > --- a/tools/perf/util/evsel.c
> > > +++ b/tools/perf/util/evsel.c
> > > @@ -58,6 +58,7 @@
> > > #include <internal/xyarray.h>
> > > #include <internal/lib.h>
> > > #include <internal/threadmap.h>
> > > +#include "util/intel-tpebs.h"
> > >
> > > #include <linux/ctype.h>
> > >
> > > @@ -1523,6 +1524,40 @@ static int evsel__read_one(struct evsel *evsel,
> > int cpu_map_idx, int thread)
> > > return perf_evsel__read(&evsel->core, cpu_map_idx, thread, count);
> > > }
> > >
> > > +static int evsel__set_retire_lat(struct evsel *evsel, int cpu_map_idx, int
> > thread)
> > > +{
> > > + struct perf_counts_values *count;
> > > + struct tpebs_retire_lat *t;
> > > + bool found = false;
> > > + __u64 val;
> > > +
> > > + count = perf_counts(evsel->counts, cpu_map_idx, thread);
> > > +
> > > + list_for_each_entry(t, &tpebs_results, nd) {
> > > + if (!strcmp(t->tpebs_name, evsel->name)) {
> > > + found = true;
> > > + break;
> > > + }
> > > + }
> > > +
> > > + if (!found)
> > > + return -1;
> > > +
> > > + /*
> > > + * Only set retire_latency value to the first CPU and thread.
> > > + */
> > > + if (cpu_map_idx == 0 && thread == 0)
> > > + val = t->val;
> > > + else
> > > + val = 0;
> > > +
> > > + count->val = val;
> > > + /* Set ena and run to non-zero */
> > > + count->ena = count->run = 1;
> > > + count->lost = 0;
> >
> > So here it seems you discard the actual count of the events
> > and replace it with the retire latency. That means you don't
> > need to open the event in perf stat, and probably just have a
> > placeholder, right?
> >
> > Btw, I think it's better to move this logic to intel-tpebs.c file and
> > rename to tpebs_set_retire_lat().
>
> Ian wants this to be here and also suggested me to rename this function to
> evsel__read_retire_lat(). I'm ok with either way.
I think it's better to have the tpebs logic together.
Thanks,
Namhyung
>
> >
> >
> > > + return 0;
> > > +}
> > > +
> > > static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int
> > thread,
> > > u64 val, u64 ena, u64 run, u64 lost)
> > > {
> > > @@ -1530,6 +1565,12 @@ static void evsel__set_count(struct evsel
> > *counter, int cpu_map_idx, int thread,
> > >
> > > count = perf_counts(counter->counts, cpu_map_idx, thread);
> > >
> > > + if (counter->retire_lat) {
> >
> > if (evsel__is_retire_lat(counter)) ?
> >
> >
> > > + evsel__set_retire_lat(counter, cpu_map_idx, thread);
> > > + perf_counts__set_loaded(counter->counts, cpu_map_idx, thread,
> > true);
> > > + return;
> > > + }
> > > +
> > > count->val = val;
> > > count->ena = ena;
> > > count->run = run;
> > > @@ -1778,6 +1819,9 @@ int evsel__read_counter(struct evsel *evsel, int
> > cpu_map_idx, int thread)
> > > if (evsel__is_tool(evsel))
> > > return evsel__read_tool(evsel, cpu_map_idx, thread);
> > >
> > > + if (evsel__is_retire_lat(evsel))
> > > + return evsel__set_retire_lat(evsel, cpu_map_idx, thread);
> > > +
> >
> > I'm not sure if it works well with group event. Probably that's
> > why you wanted to prevent group leaders. But I guess you
> > can just check this after the PERF_FORMAT_GROUP, no?
> >
> > Thanks,
> > Namhyung
> >
> >
> > > if (evsel->core.attr.read_format & PERF_FORMAT_GROUP)
> > > return evsel__read_group(evsel, cpu_map_idx, thread);
> > >
> > > diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> > > index bd8e84954e34..aaf572317e92 100644
> > > --- a/tools/perf/util/evsel.h
> > > +++ b/tools/perf/util/evsel.h
> > > @@ -303,6 +303,11 @@ static inline bool evsel__is_tool(const struct evsel
> > *evsel)
> > > return evsel->tool_event != PERF_TOOL_NONE;
> > > }
> > >
> > > +static inline bool evsel__is_retire_lat(const struct evsel *evsel)
> > > +{
> > > + return evsel->retire_lat;
> > > +}
> > > +
> > > const char *evsel__group_name(struct evsel *evsel);
> > > int evsel__group_desc(struct evsel *evsel, char *buf, size_t size);
> > >
> > > --
> > > 2.43.0
> > >
On Sun, May 26, 2024 at 11:01 AM Namhyung Kim <namhyung@kernel.org> wrote:
>
> On Fri, May 24, 2024 at 4:52 PM Wang, Weilin <weilin.wang@intel.com> wrote:
> >
> >
> >
> > > -----Original Message-----
> > > From: Namhyung Kim <namhyung@kernel.org>
> > > Sent: Friday, May 24, 2024 4:17 PM
> > > To: Wang, Weilin <weilin.wang@intel.com>
> > > Cc: Ian Rogers <irogers@google.com>; Arnaldo Carvalho de Melo
> > > <acme@kernel.org>; Peter Zijlstra <peterz@infradead.org>; Ingo Molnar
> > > <mingo@redhat.com>; Alexander Shishkin
> > > <alexander.shishkin@linux.intel.com>; Jiri Olsa <jolsa@kernel.org>; Hunter,
> > > Adrian <adrian.hunter@intel.com>; Kan Liang <kan.liang@linux.intel.com>;
> > > linux-perf-users@vger.kernel.org; linux-kernel@vger.kernel.org; Taylor, Perry
> > > <perry.taylor@intel.com>; Alt, Samantha <samantha.alt@intel.com>; Biggers,
> > > Caleb <caleb.biggers@intel.com>
> > > Subject: Re: [RFC PATCH v9 4/7] perf stat: Plugin retire_lat value from sampled
> > > data to evsel
> > >
> > > On Tue, May 21, 2024 at 10:40 AM <weilin.wang@intel.com> wrote:
> > > >
> > > > From: Weilin Wang <weilin.wang@intel.com>
> > > >
> > > > In current :R parsing implementation, the parser would recognize events
> > > with
> > > > retire_latency modifier and insert them into the evlist like a normal event.
> > > > Ideally, we need to avoid counting these events.
> > > >
> > > > In this commit, at the time when a retire_latency evsel is read, set the retire
> > > > latency value processed from the sampled data to count value. This sampled
> > > > retire latency value will be used for metric calculation and final event count
> > > > print out.
> > >
> > > I'm confused. Do you mean you don't count the event with 'R' modifier
> > > (w/ perf stat) and just print the (average) retire latency (from perf record)?
> >
> > In metric formulas, event without 'R' modifier is included as a normal event already.
> > So we don't need to count the event that with 'R' modifier. They only need to be
> > sampled.
>
> Oh, you have the event in the metric expression twice. I thought of one.
> Then IIUC the metric looks something like this.
>
> myevent1 + (myevent2 * myevent1:R)
>
> I think you'll have 2 myevent1 in perf stat and 1 in perf record, right?
> But the second one in perf stat is never used and the value is updated
> from perf record.
>
> Then we can simply remove the event from the evlist (or replace it with
> a dummy) to reduce the overheads (of open and read).
>
> >
> > >
> > > >
> > > > Signed-off-by: Weilin Wang <weilin.wang@intel.com>
> > > > ---
> > > > tools/perf/arch/x86/util/evlist.c | 6 +++++
> > > > tools/perf/util/evsel.c | 44 +++++++++++++++++++++++++++++++
> > > > tools/perf/util/evsel.h | 5 ++++
> > > > 3 files changed, 55 insertions(+)
> > > >
> > > > diff --git a/tools/perf/arch/x86/util/evlist.c
> > > b/tools/perf/arch/x86/util/evlist.c
> > > > index b1ce0c52d88d..cebdd483149e 100644
> > > > --- a/tools/perf/arch/x86/util/evlist.c
> > > > +++ b/tools/perf/arch/x86/util/evlist.c
> > > > @@ -89,6 +89,12 @@ int arch_evlist__cmp(const struct evsel *lhs, const
> > > struct evsel *rhs)
> > > > return 1;
> > > > }
> > > >
> > > > + /* Retire latency event should not be group leader*/
> > >
> > > Hmm.. why?
> > Because we don't want to count them. Make them the group leader would not work.
>
> I don't understand. You'll read the event regardless of being a
> leader or not.
>
> >
> > >
> > > > + if (lhs->retire_lat && !rhs->retire_lat)
> > > > + return 1;
> > > > + if (!lhs->retire_lat && rhs->retire_lat)
> > > > + return -1;
> > > > +
> > > > /* Default ordering by insertion index. */
> > > > return lhs->core.idx - rhs->core.idx;
> > > > }
> > > > diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> > > > index a0a8aee7d6b9..4d700338fc99 100644
> > > > --- a/tools/perf/util/evsel.c
> > > > +++ b/tools/perf/util/evsel.c
> > > > @@ -58,6 +58,7 @@
> > > > #include <internal/xyarray.h>
> > > > #include <internal/lib.h>
> > > > #include <internal/threadmap.h>
> > > > +#include "util/intel-tpebs.h"
> > > >
> > > > #include <linux/ctype.h>
> > > >
> > > > @@ -1523,6 +1524,40 @@ static int evsel__read_one(struct evsel *evsel,
> > > int cpu_map_idx, int thread)
> > > > return perf_evsel__read(&evsel->core, cpu_map_idx, thread, count);
> > > > }
> > > >
> > > > +static int evsel__set_retire_lat(struct evsel *evsel, int cpu_map_idx, int
> > > thread)
> > > > +{
> > > > + struct perf_counts_values *count;
> > > > + struct tpebs_retire_lat *t;
> > > > + bool found = false;
> > > > + __u64 val;
> > > > +
> > > > + count = perf_counts(evsel->counts, cpu_map_idx, thread);
> > > > +
> > > > + list_for_each_entry(t, &tpebs_results, nd) {
> > > > + if (!strcmp(t->tpebs_name, evsel->name)) {
> > > > + found = true;
> > > > + break;
> > > > + }
> > > > + }
> > > > +
> > > > + if (!found)
> > > > + return -1;
> > > > +
> > > > + /*
> > > > + * Only set retire_latency value to the first CPU and thread.
> > > > + */
> > > > + if (cpu_map_idx == 0 && thread == 0)
> > > > + val = t->val;
> > > > + else
> > > > + val = 0;
> > > > +
> > > > + count->val = val;
> > > > + /* Set ena and run to non-zero */
> > > > + count->ena = count->run = 1;
> > > > + count->lost = 0;
> > >
> > > So here it seems you discard the actual count of the events
> > > and replace it with the retire latency. That means you don't
> > > need to open the event in perf stat, and probably just have a
> > > placeholder, right?
> > >
> > > Btw, I think it's better to move this logic to intel-tpebs.c file and
> > > rename to tpebs_set_retire_lat().
> >
> > Ian wants this to be here and also suggested me to rename this function to
> > evsel__read_retire_lat(). I'm ok with either way.
>
> I think it's better to have the tpebs logic together.
I think the tpebs functions can be in a tpebs file. I'd rather have
the retirement latency events (from the metric) be evsels for a few
reasons:
1) I'd rather everything in a metric be evsels, so things like
"num_cpus_online" should really be a tool event rather than a special
literal kind of thing. I'd like to reduce special cases over time, in
part as it should help with portability. For example, if only Intel
x86 can parse :R then someone trying to parse an Intel x86 metric on
ARM may get parser errors.
2) When we change from forking perf record to directly opening a
sampling ring buffer then it makes sense that we use/update the
evsel/evlist logic.
Thanks,
Ian
> Thanks,
> Namhyung
>
> >
> > >
> > >
> > > > + return 0;
> > > > +}
> > > > +
> > > > static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int
> > > thread,
> > > > u64 val, u64 ena, u64 run, u64 lost)
> > > > {
> > > > @@ -1530,6 +1565,12 @@ static void evsel__set_count(struct evsel
> > > *counter, int cpu_map_idx, int thread,
> > > >
> > > > count = perf_counts(counter->counts, cpu_map_idx, thread);
> > > >
> > > > + if (counter->retire_lat) {
> > >
> > > if (evsel__is_retire_lat(counter)) ?
> > >
> > >
> > > > + evsel__set_retire_lat(counter, cpu_map_idx, thread);
> > > > + perf_counts__set_loaded(counter->counts, cpu_map_idx, thread,
> > > true);
> > > > + return;
> > > > + }
> > > > +
> > > > count->val = val;
> > > > count->ena = ena;
> > > > count->run = run;
> > > > @@ -1778,6 +1819,9 @@ int evsel__read_counter(struct evsel *evsel, int
> > > cpu_map_idx, int thread)
> > > > if (evsel__is_tool(evsel))
> > > > return evsel__read_tool(evsel, cpu_map_idx, thread);
> > > >
> > > > + if (evsel__is_retire_lat(evsel))
> > > > + return evsel__set_retire_lat(evsel, cpu_map_idx, thread);
> > > > +
> > >
> > > I'm not sure if it works well with group event. Probably that's
> > > why you wanted to prevent group leaders. But I guess you
> > > can just check this after the PERF_FORMAT_GROUP, no?
> > >
> > > Thanks,
> > > Namhyung
> > >
> > >
> > > > if (evsel->core.attr.read_format & PERF_FORMAT_GROUP)
> > > > return evsel__read_group(evsel, cpu_map_idx, thread);
> > > >
> > > > diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> > > > index bd8e84954e34..aaf572317e92 100644
> > > > --- a/tools/perf/util/evsel.h
> > > > +++ b/tools/perf/util/evsel.h
> > > > @@ -303,6 +303,11 @@ static inline bool evsel__is_tool(const struct evsel
> > > *evsel)
> > > > return evsel->tool_event != PERF_TOOL_NONE;
> > > > }
> > > >
> > > > +static inline bool evsel__is_retire_lat(const struct evsel *evsel)
> > > > +{
> > > > + return evsel->retire_lat;
> > > > +}
> > > > +
> > > > const char *evsel__group_name(struct evsel *evsel);
> > > > int evsel__group_desc(struct evsel *evsel, char *buf, size_t size);
> > > >
> > > > --
> > > > 2.43.0
> > > >
> -----Original Message-----
> From: Namhyung Kim <namhyung@kernel.org>
> Sent: Sunday, May 26, 2024 11:02 AM
> To: Wang, Weilin <weilin.wang@intel.com>
> Cc: Ian Rogers <irogers@google.com>; Arnaldo Carvalho de Melo
> <acme@kernel.org>; Peter Zijlstra <peterz@infradead.org>; Ingo Molnar
> <mingo@redhat.com>; Alexander Shishkin
> <alexander.shishkin@linux.intel.com>; Jiri Olsa <jolsa@kernel.org>; Hunter,
> Adrian <adrian.hunter@intel.com>; Kan Liang <kan.liang@linux.intel.com>;
> linux-perf-users@vger.kernel.org; linux-kernel@vger.kernel.org; Taylor, Perry
> <perry.taylor@intel.com>; Alt, Samantha <samantha.alt@intel.com>; Biggers,
> Caleb <caleb.biggers@intel.com>
> Subject: Re: [RFC PATCH v9 4/7] perf stat: Plugin retire_lat value from sampled
> data to evsel
>
> On Fri, May 24, 2024 at 4:52 PM Wang, Weilin <weilin.wang@intel.com>
> wrote:
> >
> >
> >
> > > -----Original Message-----
> > > From: Namhyung Kim <namhyung@kernel.org>
> > > Sent: Friday, May 24, 2024 4:17 PM
> > > To: Wang, Weilin <weilin.wang@intel.com>
> > > Cc: Ian Rogers <irogers@google.com>; Arnaldo Carvalho de Melo
> > > <acme@kernel.org>; Peter Zijlstra <peterz@infradead.org>; Ingo Molnar
> > > <mingo@redhat.com>; Alexander Shishkin
> > > <alexander.shishkin@linux.intel.com>; Jiri Olsa <jolsa@kernel.org>; Hunter,
> > > Adrian <adrian.hunter@intel.com>; Kan Liang <kan.liang@linux.intel.com>;
> > > linux-perf-users@vger.kernel.org; linux-kernel@vger.kernel.org; Taylor,
> Perry
> > > <perry.taylor@intel.com>; Alt, Samantha <samantha.alt@intel.com>;
> Biggers,
> > > Caleb <caleb.biggers@intel.com>
> > > Subject: Re: [RFC PATCH v9 4/7] perf stat: Plugin retire_lat value from
> sampled
> > > data to evsel
> > >
> > > On Tue, May 21, 2024 at 10:40 AM <weilin.wang@intel.com> wrote:
> > > >
> > > > From: Weilin Wang <weilin.wang@intel.com>
> > > >
> > > > In current :R parsing implementation, the parser would recognize events
> > > with
> > > > retire_latency modifier and insert them into the evlist like a normal event.
> > > > Ideally, we need to avoid counting these events.
> > > >
> > > > In this commit, at the time when a retire_latency evsel is read, set the
> retire
> > > > latency value processed from the sampled data to count value. This
> sampled
> > > > retire latency value will be used for metric calculation and final event
> count
> > > > print out.
> > >
> > > I'm confused. Do you mean you don't count the event with 'R' modifier
> > > (w/ perf stat) and just print the (average) retire latency (from perf record)?
> >
> > In metric formulas, event without 'R' modifier is included as a normal event
> already.
> > So we don't need to count the event that with 'R' modifier. They only need to
> be
> > sampled.
>
> Oh, you have the event in the metric expression twice. I thought of one.
> Then IIUC the metric looks something like this.
>
> myevent1 + (myevent2 * myevent1:R)
>
> I think you'll have 2 myevent1 in perf stat and 1 in perf record, right?
> But the second one in perf stat is never used and the value is updated
> from perf record.
>
> Then we can simply remove the event from the evlist (or replace it with
> a dummy) to reduce the overheads (of open and read).
Yes, you are right. Ideally, we want to not do the extra counting on the :R event.
At the same time, I think Ian also wants to reuse code in evsel for the :R events,
so that we don't need special code to handle retire_latency value printout and
metric calculation. Therefore, I think we need to keep retire_latency events in
evlist and set processed retire_latency values to the evsel.
If we replace it with a dummy for open and read and then set retire_latency
value to it later, will it still be used in metric calculation and printout?
Thanks,
Weilin
>
> >
> > >
> > > >
> > > > Signed-off-by: Weilin Wang <weilin.wang@intel.com>
> > > > ---
> > > > tools/perf/arch/x86/util/evlist.c | 6 +++++
> > > > tools/perf/util/evsel.c | 44
> +++++++++++++++++++++++++++++++
> > > > tools/perf/util/evsel.h | 5 ++++
> > > > 3 files changed, 55 insertions(+)
> > > >
> > > > diff --git a/tools/perf/arch/x86/util/evlist.c
> > > b/tools/perf/arch/x86/util/evlist.c
> > > > index b1ce0c52d88d..cebdd483149e 100644
> > > > --- a/tools/perf/arch/x86/util/evlist.c
> > > > +++ b/tools/perf/arch/x86/util/evlist.c
> > > > @@ -89,6 +89,12 @@ int arch_evlist__cmp(const struct evsel *lhs, const
> > > struct evsel *rhs)
> > > > return 1;
> > > > }
> > > >
> > > > + /* Retire latency event should not be group leader*/
> > >
> > > Hmm.. why?
> > Because we don't want to count them. Make them the group leader would
> not work.
>
> I don't understand. You'll read the event regardless of being a
> leader or not.
>
> >
> > >
> > > > + if (lhs->retire_lat && !rhs->retire_lat)
> > > > + return 1;
> > > > + if (!lhs->retire_lat && rhs->retire_lat)
> > > > + return -1;
> > > > +
> > > > /* Default ordering by insertion index. */
> > > > return lhs->core.idx - rhs->core.idx;
> > > > }
> > > > diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> > > > index a0a8aee7d6b9..4d700338fc99 100644
> > > > --- a/tools/perf/util/evsel.c
> > > > +++ b/tools/perf/util/evsel.c
> > > > @@ -58,6 +58,7 @@
> > > > #include <internal/xyarray.h>
> > > > #include <internal/lib.h>
> > > > #include <internal/threadmap.h>
> > > > +#include "util/intel-tpebs.h"
> > > >
> > > > #include <linux/ctype.h>
> > > >
> > > > @@ -1523,6 +1524,40 @@ static int evsel__read_one(struct evsel
> *evsel,
> > > int cpu_map_idx, int thread)
> > > > return perf_evsel__read(&evsel->core, cpu_map_idx, thread, count);
> > > > }
> > > >
> > > > +static int evsel__set_retire_lat(struct evsel *evsel, int cpu_map_idx, int
> > > thread)
> > > > +{
> > > > + struct perf_counts_values *count;
> > > > + struct tpebs_retire_lat *t;
> > > > + bool found = false;
> > > > + __u64 val;
> > > > +
> > > > + count = perf_counts(evsel->counts, cpu_map_idx, thread);
> > > > +
> > > > + list_for_each_entry(t, &tpebs_results, nd) {
> > > > + if (!strcmp(t->tpebs_name, evsel->name)) {
> > > > + found = true;
> > > > + break;
> > > > + }
> > > > + }
> > > > +
> > > > + if (!found)
> > > > + return -1;
> > > > +
> > > > + /*
> > > > + * Only set retire_latency value to the first CPU and thread.
> > > > + */
> > > > + if (cpu_map_idx == 0 && thread == 0)
> > > > + val = t->val;
> > > > + else
> > > > + val = 0;
> > > > +
> > > > + count->val = val;
> > > > + /* Set ena and run to non-zero */
> > > > + count->ena = count->run = 1;
> > > > + count->lost = 0;
> > >
> > > So here it seems you discard the actual count of the events
> > > and replace it with the retire latency. That means you don't
> > > need to open the event in perf stat, and probably just have a
> > > placeholder, right?
> > >
> > > Btw, I think it's better to move this logic to intel-tpebs.c file and
> > > rename to tpebs_set_retire_lat().
> >
> > Ian wants this to be here and also suggested me to rename this function to
> > evsel__read_retire_lat(). I'm ok with either way.
>
> I think it's better to have the tpebs logic together.
>
> Thanks,
> Namhyung
>
> >
> > >
> > >
> > > > + return 0;
> > > > +}
> > > > +
> > > > static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int
> > > thread,
> > > > u64 val, u64 ena, u64 run, u64 lost)
> > > > {
> > > > @@ -1530,6 +1565,12 @@ static void evsel__set_count(struct evsel
> > > *counter, int cpu_map_idx, int thread,
> > > >
> > > > count = perf_counts(counter->counts, cpu_map_idx, thread);
> > > >
> > > > + if (counter->retire_lat) {
> > >
> > > if (evsel__is_retire_lat(counter)) ?
> > >
> > >
> > > > + evsel__set_retire_lat(counter, cpu_map_idx, thread);
> > > > + perf_counts__set_loaded(counter->counts, cpu_map_idx,
> thread,
> > > true);
> > > > + return;
> > > > + }
> > > > +
> > > > count->val = val;
> > > > count->ena = ena;
> > > > count->run = run;
> > > > @@ -1778,6 +1819,9 @@ int evsel__read_counter(struct evsel *evsel,
> int
> > > cpu_map_idx, int thread)
> > > > if (evsel__is_tool(evsel))
> > > > return evsel__read_tool(evsel, cpu_map_idx, thread);
> > > >
> > > > + if (evsel__is_retire_lat(evsel))
> > > > + return evsel__set_retire_lat(evsel, cpu_map_idx, thread);
> > > > +
> > >
> > > I'm not sure if it works well with group event. Probably that's
> > > why you wanted to prevent group leaders. But I guess you
> > > can just check this after the PERF_FORMAT_GROUP, no?
> > >
> > > Thanks,
> > > Namhyung
> > >
> > >
> > > > if (evsel->core.attr.read_format & PERF_FORMAT_GROUP)
> > > > return evsel__read_group(evsel, cpu_map_idx, thread);
> > > >
> > > > diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> > > > index bd8e84954e34..aaf572317e92 100644
> > > > --- a/tools/perf/util/evsel.h
> > > > +++ b/tools/perf/util/evsel.h
> > > > @@ -303,6 +303,11 @@ static inline bool evsel__is_tool(const struct
> evsel
> > > *evsel)
> > > > return evsel->tool_event != PERF_TOOL_NONE;
> > > > }
> > > >
> > > > +static inline bool evsel__is_retire_lat(const struct evsel *evsel)
> > > > +{
> > > > + return evsel->retire_lat;
> > > > +}
> > > > +
> > > > const char *evsel__group_name(struct evsel *evsel);
> > > > int evsel__group_desc(struct evsel *evsel, char *buf, size_t size);
> > > >
> > > > --
> > > > 2.43.0
> > > >
© 2016 - 2026 Red Hat, Inc.