[PATCH v2 10/16] perf intel-tpebs: Add support for updating counts in evsel__tpebs_read

Ian Rogers posted 16 patches 10 months, 1 week ago
There is a newer version of this series
[PATCH v2 10/16] perf intel-tpebs: Add support for updating counts in evsel__tpebs_read
Posted by Ian Rogers 10 months, 1 week ago
Rename to reflect evsel argument and for consistency with other tpebs
functions. Update count from prev_raw_counts when
available. Eventually this will allow inteval mode support.

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/perf/util/evsel.c       | 11 ++------
 tools/perf/util/intel-tpebs.c | 52 ++++++++++++++---------------------
 tools/perf/util/intel-tpebs.h |  2 +-
 3 files changed, 25 insertions(+), 40 deletions(-)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 554252ed1aab..1d343f51225b 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1718,11 +1718,6 @@ static int evsel__read_one(struct evsel *evsel, int cpu_map_idx, int thread)
 	return perf_evsel__read(&evsel->core, cpu_map_idx, thread, count);
 }
 
-static int evsel__read_retire_lat(struct evsel *evsel, int cpu_map_idx, int thread)
-{
-	return tpebs_set_evsel(evsel, cpu_map_idx, thread);
-}
-
 static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int thread,
 			     u64 val, u64 ena, u64 run, u64 lost)
 {
@@ -1730,8 +1725,8 @@ static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int thread,
 
 	count = perf_counts(counter->counts, cpu_map_idx, thread);
 
-	if (counter->retire_lat) {
-		evsel__read_retire_lat(counter, cpu_map_idx, thread);
+	if (evsel__is_retire_lat(counter)) {
+		evsel__tpebs_read(counter, cpu_map_idx, thread);
 		perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, true);
 		return;
 	}
@@ -1889,7 +1884,7 @@ int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread)
 		return evsel__hwmon_pmu_read(evsel, cpu_map_idx, thread);
 
 	if (evsel__is_retire_lat(evsel))
-		return evsel__read_retire_lat(evsel, cpu_map_idx, thread);
+		return evsel__tpebs_read(evsel, cpu_map_idx, thread);
 
 	if (evsel->core.attr.read_format & PERF_FORMAT_GROUP)
 		return evsel__read_group(evsel, cpu_map_idx, thread);
diff --git a/tools/perf/util/intel-tpebs.c b/tools/perf/util/intel-tpebs.c
index e3227646a9cc..452ce3698221 100644
--- a/tools/perf/util/intel-tpebs.c
+++ b/tools/perf/util/intel-tpebs.c
@@ -415,49 +415,39 @@ int evsel__tpebs_open(struct evsel *evsel)
 	return ret;
 }
 
-
-int tpebs_set_evsel(struct evsel *evsel, int cpu_map_idx, int thread)
+int evsel__tpebs_read(struct evsel *evsel, int cpu_map_idx, int thread)
 {
-	__u64 val;
+	struct perf_counts_values *count, *old_count = NULL;
 	struct tpebs_retire_lat *t;
-	struct perf_counts_values *count;
+	uint64_t val;
+
+	/* Only set retire_latency value to the first CPU and thread. */
+	if (cpu_map_idx != 0 || thread != 0)
+		return 0;
+
+	if (evsel->prev_raw_counts)
+		old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread);
 
-	/* Non reitre_latency evsel should never enter this function. */
-	if (!evsel__is_retire_lat(evsel))
-		return -1;
+	count = perf_counts(evsel->counts, cpu_map_idx, thread);
 
 	/*
 	 * Need to stop the forked record to ensure get sampled data from the
 	 * PIPE to process and get non-zero retire_lat value for hybrid.
 	 */
 	tpebs_stop();
-	count = perf_counts(evsel->counts, cpu_map_idx, thread);
 
 	t = tpebs_retire_lat__find(evsel);
-
-	/* Set ena and run to non-zero */
-	count->ena = count->run = 1;
-	count->lost = 0;
-
-	if (!t) {
-		/*
-		 * Set default value or 0 when retire_latency for this event is
-		 * not found from sampling data (record_tpebs not set or 0
-		 * sample recorded).
-		 */
-		count->val = 0;
-		return 0;
+	val = rint(t->val);
+
+	if (old_count) {
+		count->val = old_count->val + val;
+		count->run = old_count->run + 1;
+		count->ena = old_count->ena + 1;
+	} else {
+		count->val = val;
+		count->run++;
+		count->ena++;
 	}
-
-	/*
-	 * Only set retire_latency value to the first CPU and thread.
-	 */
-	if (cpu_map_idx == 0 && thread == 0)
-		val = rint(t->val);
-	else
-		val = 0;
-
-	count->val = val;
 	return 0;
 }
 
diff --git a/tools/perf/util/intel-tpebs.h b/tools/perf/util/intel-tpebs.h
index 5c671181ec60..218a82866cee 100644
--- a/tools/perf/util/intel-tpebs.h
+++ b/tools/perf/util/intel-tpebs.h
@@ -12,6 +12,6 @@ extern bool tpebs_recording;
 
 int evsel__tpebs_open(struct evsel *evsel);
 void evsel__tpebs_close(struct evsel *evsel);
-int tpebs_set_evsel(struct evsel *evsel, int cpu_map_idx, int thread);
+int evsel__tpebs_read(struct evsel *evsel, int cpu_map_idx, int thread);
 
 #endif /* __INTEL_TPEBS_H */
-- 
2.49.0.504.g3bcea36a83-goog
Re: [PATCH v2 10/16] perf intel-tpebs: Add support for updating counts in evsel__tpebs_read
Posted by Liang, Kan 10 months, 1 week ago

On 2025-04-07 1:00 a.m., Ian Rogers wrote:
> Rename to reflect evsel argument and for consistency with other tpebs
> functions. Update count from prev_raw_counts when
> available. Eventually this will allow inteval mode support.
> 
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
>  tools/perf/util/evsel.c       | 11 ++------
>  tools/perf/util/intel-tpebs.c | 52 ++++++++++++++---------------------
>  tools/perf/util/intel-tpebs.h |  2 +-
>  3 files changed, 25 insertions(+), 40 deletions(-)
> 
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index 554252ed1aab..1d343f51225b 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -1718,11 +1718,6 @@ static int evsel__read_one(struct evsel *evsel, int cpu_map_idx, int thread)
>  	return perf_evsel__read(&evsel->core, cpu_map_idx, thread, count);
>  }
>  
> -static int evsel__read_retire_lat(struct evsel *evsel, int cpu_map_idx, int thread)
> -{
> -	return tpebs_set_evsel(evsel, cpu_map_idx, thread);
> -}
> -
>  static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int thread,
>  			     u64 val, u64 ena, u64 run, u64 lost)
>  {
> @@ -1730,8 +1725,8 @@ static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int thread,
>  
>  	count = perf_counts(counter->counts, cpu_map_idx, thread);
>  
> -	if (counter->retire_lat) {
> -		evsel__read_retire_lat(counter, cpu_map_idx, thread);
> +	if (evsel__is_retire_lat(counter)) {
> +		evsel__tpebs_read(counter, cpu_map_idx, thread);
>  		perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, true);
>  		return;
>  	}
> @@ -1889,7 +1884,7 @@ int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread)
>  		return evsel__hwmon_pmu_read(evsel, cpu_map_idx, thread);
>  
>  	if (evsel__is_retire_lat(evsel))
> -		return evsel__read_retire_lat(evsel, cpu_map_idx, thread);
> +		return evsel__tpebs_read(evsel, cpu_map_idx, thread);
>  
>  	if (evsel->core.attr.read_format & PERF_FORMAT_GROUP)
>  		return evsel__read_group(evsel, cpu_map_idx, thread);
> diff --git a/tools/perf/util/intel-tpebs.c b/tools/perf/util/intel-tpebs.c
> index e3227646a9cc..452ce3698221 100644
> --- a/tools/perf/util/intel-tpebs.c
> +++ b/tools/perf/util/intel-tpebs.c
> @@ -415,49 +415,39 @@ int evsel__tpebs_open(struct evsel *evsel)
>  	return ret;
>  }
>  
> -
> -int tpebs_set_evsel(struct evsel *evsel, int cpu_map_idx, int thread)
> +int evsel__tpebs_read(struct evsel *evsel, int cpu_map_idx, int thread)
>  {
> -	__u64 val;
> +	struct perf_counts_values *count, *old_count = NULL;
>  	struct tpebs_retire_lat *t;
> -	struct perf_counts_values *count;
> +	uint64_t val;
> +
> +	/* Only set retire_latency value to the first CPU and thread. */
> +	if (cpu_map_idx != 0 || thread != 0)
> +		return 0;
> +
> +	if (evsel->prev_raw_counts)
> +		old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread);
>  
> -	/* Non reitre_latency evsel should never enter this function. */
> -	if (!evsel__is_retire_lat(evsel))
> -		return -1;
> +	count = perf_counts(evsel->counts, cpu_map_idx, thread);
>  
>  	/*
>  	 * Need to stop the forked record to ensure get sampled data from the
>  	 * PIPE to process and get non-zero retire_lat value for hybrid.
>  	 */
>  	tpebs_stop();
> -	count = perf_counts(evsel->counts, cpu_map_idx, thread);
>  
>  	t = tpebs_retire_lat__find(evsel);
> -
> -	/* Set ena and run to non-zero */
> -	count->ena = count->run = 1;
> -	count->lost = 0;
> -
> -	if (!t) {
> -		/*
> -		 * Set default value or 0 when retire_latency for this event is
> -		 * not found from sampling data (record_tpebs not set or 0
> -		 * sample recorded).
> -		 */
> -		count->val = 0;
> -		return 0;
> +	val = rint(t->val);
> +
> +	if (old_count) {
> +		count->val = old_count->val + val;
> +		count->run = old_count->run + 1;
> +		count->ena = old_count->ena + 1;
> +	} else {
> +		count->val = val;
> +		count->run++;
> +		count->ena++;
>  	}

It seems utilizing the prev_raw_counts has been used in other place,
e.g., hwmon_pmu. Is it possible to factor out a common function for it?

Thanks,
Kan> -
> -	/*
> -	 * Only set retire_latency value to the first CPU and thread.
> -	 */
> -	if (cpu_map_idx == 0 && thread == 0)
> -		val = rint(t->val);
> -	else
> -		val = 0;
> -
> -	count->val = val;
>  	return 0;
>  }
>  
> diff --git a/tools/perf/util/intel-tpebs.h b/tools/perf/util/intel-tpebs.h
> index 5c671181ec60..218a82866cee 100644
> --- a/tools/perf/util/intel-tpebs.h
> +++ b/tools/perf/util/intel-tpebs.h
> @@ -12,6 +12,6 @@ extern bool tpebs_recording;
>  
>  int evsel__tpebs_open(struct evsel *evsel);
>  void evsel__tpebs_close(struct evsel *evsel);
> -int tpebs_set_evsel(struct evsel *evsel, int cpu_map_idx, int thread);
> +int evsel__tpebs_read(struct evsel *evsel, int cpu_map_idx, int thread);
>  
>  #endif /* __INTEL_TPEBS_H */
Re: [PATCH v2 10/16] perf intel-tpebs: Add support for updating counts in evsel__tpebs_read
Posted by Ian Rogers 10 months, 1 week ago
On Mon, Apr 7, 2025 at 12:37 PM Liang, Kan <kan.liang@linux.intel.com> wrote:
>
>
>
> On 2025-04-07 1:00 a.m., Ian Rogers wrote:
> > Rename to reflect evsel argument and for consistency with other tpebs
> > functions. Update count from prev_raw_counts when
> > available. Eventually this will allow inteval mode support.
> >
> > Signed-off-by: Ian Rogers <irogers@google.com>
> > ---
> >  tools/perf/util/evsel.c       | 11 ++------
> >  tools/perf/util/intel-tpebs.c | 52 ++++++++++++++---------------------
> >  tools/perf/util/intel-tpebs.h |  2 +-
> >  3 files changed, 25 insertions(+), 40 deletions(-)
> >
> > diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> > index 554252ed1aab..1d343f51225b 100644
> > --- a/tools/perf/util/evsel.c
> > +++ b/tools/perf/util/evsel.c
> > @@ -1718,11 +1718,6 @@ static int evsel__read_one(struct evsel *evsel, int cpu_map_idx, int thread)
> >       return perf_evsel__read(&evsel->core, cpu_map_idx, thread, count);
> >  }
> >
> > -static int evsel__read_retire_lat(struct evsel *evsel, int cpu_map_idx, int thread)
> > -{
> > -     return tpebs_set_evsel(evsel, cpu_map_idx, thread);
> > -}
> > -
> >  static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int thread,
> >                            u64 val, u64 ena, u64 run, u64 lost)
> >  {
> > @@ -1730,8 +1725,8 @@ static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int thread,
> >
> >       count = perf_counts(counter->counts, cpu_map_idx, thread);
> >
> > -     if (counter->retire_lat) {
> > -             evsel__read_retire_lat(counter, cpu_map_idx, thread);
> > +     if (evsel__is_retire_lat(counter)) {
> > +             evsel__tpebs_read(counter, cpu_map_idx, thread);
> >               perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, true);
> >               return;
> >       }
> > @@ -1889,7 +1884,7 @@ int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread)
> >               return evsel__hwmon_pmu_read(evsel, cpu_map_idx, thread);
> >
> >       if (evsel__is_retire_lat(evsel))
> > -             return evsel__read_retire_lat(evsel, cpu_map_idx, thread);
> > +             return evsel__tpebs_read(evsel, cpu_map_idx, thread);
> >
> >       if (evsel->core.attr.read_format & PERF_FORMAT_GROUP)
> >               return evsel__read_group(evsel, cpu_map_idx, thread);
> > diff --git a/tools/perf/util/intel-tpebs.c b/tools/perf/util/intel-tpebs.c
> > index e3227646a9cc..452ce3698221 100644
> > --- a/tools/perf/util/intel-tpebs.c
> > +++ b/tools/perf/util/intel-tpebs.c
> > @@ -415,49 +415,39 @@ int evsel__tpebs_open(struct evsel *evsel)
> >       return ret;
> >  }
> >
> > -
> > -int tpebs_set_evsel(struct evsel *evsel, int cpu_map_idx, int thread)
> > +int evsel__tpebs_read(struct evsel *evsel, int cpu_map_idx, int thread)
> >  {
> > -     __u64 val;
> > +     struct perf_counts_values *count, *old_count = NULL;
> >       struct tpebs_retire_lat *t;
> > -     struct perf_counts_values *count;
> > +     uint64_t val;
> > +
> > +     /* Only set retire_latency value to the first CPU and thread. */
> > +     if (cpu_map_idx != 0 || thread != 0)
> > +             return 0;
> > +
> > +     if (evsel->prev_raw_counts)
> > +             old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread);
> >
> > -     /* Non reitre_latency evsel should never enter this function. */
> > -     if (!evsel__is_retire_lat(evsel))
> > -             return -1;
> > +     count = perf_counts(evsel->counts, cpu_map_idx, thread);
> >
> >       /*
> >        * Need to stop the forked record to ensure get sampled data from the
> >        * PIPE to process and get non-zero retire_lat value for hybrid.
> >        */
> >       tpebs_stop();
> > -     count = perf_counts(evsel->counts, cpu_map_idx, thread);
> >
> >       t = tpebs_retire_lat__find(evsel);
> > -
> > -     /* Set ena and run to non-zero */
> > -     count->ena = count->run = 1;
> > -     count->lost = 0;
> > -
> > -     if (!t) {
> > -             /*
> > -              * Set default value or 0 when retire_latency for this event is
> > -              * not found from sampling data (record_tpebs not set or 0
> > -              * sample recorded).
> > -              */
> > -             count->val = 0;
> > -             return 0;
> > +     val = rint(t->val);
> > +
> > +     if (old_count) {
> > +             count->val = old_count->val + val;
> > +             count->run = old_count->run + 1;
> > +             count->ena = old_count->ena + 1;
> > +     } else {
> > +             count->val = val;
> > +             count->run++;
> > +             count->ena++;
> >       }
>
> It seems utilizing the prev_raw_counts has been used in other place,
> e.g., hwmon_pmu. Is it possible to factor out a common function for it?

Thanks Kan, I also use it for the (unmerged) DRM PMU:
https://lore.kernel.org/lkml/20250403202439.57791-4-irogers@google.com/
I think adding a common function for this can be follow up work.

Thanks,
Ian

> Thanks,
> Kan> -
> > -     /*
> > -      * Only set retire_latency value to the first CPU and thread.
> > -      */
> > -     if (cpu_map_idx == 0 && thread == 0)
> > -             val = rint(t->val);
> > -     else
> > -             val = 0;
> > -
> > -     count->val = val;
> >       return 0;
> >  }
> >
> > diff --git a/tools/perf/util/intel-tpebs.h b/tools/perf/util/intel-tpebs.h
> > index 5c671181ec60..218a82866cee 100644
> > --- a/tools/perf/util/intel-tpebs.h
> > +++ b/tools/perf/util/intel-tpebs.h
> > @@ -12,6 +12,6 @@ extern bool tpebs_recording;
> >
> >  int evsel__tpebs_open(struct evsel *evsel);
> >  void evsel__tpebs_close(struct evsel *evsel);
> > -int tpebs_set_evsel(struct evsel *evsel, int cpu_map_idx, int thread);
> > +int evsel__tpebs_read(struct evsel *evsel, int cpu_map_idx, int thread);
> >
> >  #endif /* __INTEL_TPEBS_H */
>