[PATCH v1 04/12] perf parse-events: Allow the cpu term to be a PMU

Ian Rogers posted 12 patches 3 months, 1 week ago
There is a newer version of this series
[PATCH v1 04/12] perf parse-events: Allow the cpu term to be a PMU
Posted by Ian Rogers 3 months, 1 week ago
On hybrid systems, events like msr/tsc/ will aggregate counts across
all CPUs. Often metrics only want a value like msr/tsc/ for the cores
on which the metric is being computed. Listing each CPU with terms
cpu=0,cpu=1.. is laborious and would need to be encoded for all
variations of a CPU model.

Allow the cpumask from a PMU to be an argument to the cpu term. For
example in the following the cpumask of the cstate_pkg PMU selects the
CPUs to count msr/tsc/ counter upon:
```
$ cat /sys/bus/event_source/devices/cstate_pkg/cpumask
0
$ perf stat -A -e 'msr/tsc,cpu=cstate_pkg/' -a sleep 0.1

 Performance counter stats for 'system wide':

CPU0          252,621,253      msr/tsc,cpu=cstate_pkg/

       0.101184092 seconds time elapsed
```

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/perf/util/parse-events.c | 37 +++++++++++++++++++++++++---------
 1 file changed, 28 insertions(+), 9 deletions(-)

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 7a32d5234a64..ef38eb082342 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -192,10 +192,20 @@ static struct perf_cpu_map *get_config_cpu(const struct parse_events_terms *head
 
 	list_for_each_entry(term, &head_terms->terms, list) {
 		if (term->type_term == PARSE_EVENTS__TERM_TYPE_CPU) {
-			struct perf_cpu_map *cpu = perf_cpu_map__new_int(term->val.num);
+			struct perf_cpu_map *term_cpus;
 
-			perf_cpu_map__merge(&cpus, cpu);
-			perf_cpu_map__put(cpu);
+			if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) {
+				term_cpus = perf_cpu_map__new_int(term->val.num);
+			} else {
+				struct perf_pmu *pmu = perf_pmus__find(term->val.str);
+
+				if (perf_cpu_map__is_empty(pmu->cpus))
+					term_cpus = pmu->is_core ? cpu_map__online() : NULL;
+				else
+					term_cpus = perf_cpu_map__get(pmu->cpus);
+			}
+			perf_cpu_map__merge(&cpus, term_cpus);
+			perf_cpu_map__put(term_cpus);
 		}
 	}
 
@@ -1054,12 +1064,21 @@ do {									   \
 		}
 		break;
 	case PARSE_EVENTS__TERM_TYPE_CPU:
-		CHECK_TYPE_VAL(NUM);
-		if (term->val.num >= (u64)cpu__max_present_cpu().cpu) {
-			parse_events_error__handle(err, term->err_val,
-						strdup("too big"),
-						NULL);
-			return -EINVAL;
+		if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) {
+			if (term->val.num >= (u64)cpu__max_present_cpu().cpu) {
+				parse_events_error__handle(err, term->err_val,
+							strdup("too big"),
+							/*help=*/NULL);
+				return -EINVAL;
+			}
+		} else {
+			assert(term->type_val == PARSE_EVENTS__TERM_TYPE_STR);
+			if (perf_pmus__find(term->val.str) == NULL) {
+				parse_events_error__handle(err, term->err_val,
+							strdup("not a valid PMU"),
+							/*help=*/NULL);
+				return -EINVAL;
+			}
 		}
 		break;
 	case PARSE_EVENTS__TERM_TYPE_DRV_CFG:
-- 
2.50.0.727.gbf7dc18ff4-goog
Re: [PATCH v1 04/12] perf parse-events: Allow the cpu term to be a PMU
Posted by Namhyung Kim 2 months, 3 weeks ago
On Fri, Jun 27, 2025 at 12:24:09PM -0700, Ian Rogers wrote:
> On hybrid systems, events like msr/tsc/ will aggregate counts across
> all CPUs. Often metrics only want a value like msr/tsc/ for the cores
> on which the metric is being computed. Listing each CPU with terms
> cpu=0,cpu=1.. is laborious and would need to be encoded for all
> variations of a CPU model.
> 
> Allow the cpumask from a PMU to be an argument to the cpu term. For
> example in the following the cpumask of the cstate_pkg PMU selects the
> CPUs to count msr/tsc/ counter upon:
> ```
> $ cat /sys/bus/event_source/devices/cstate_pkg/cpumask
> 0
> $ perf stat -A -e 'msr/tsc,cpu=cstate_pkg/' -a sleep 0.1

It can be confusing if 'cpu' takes a number or a PMU name.  What about
adding a new term (maybe 'cpu_from') to handle this case?

Also please update the documentation.

Thanks,
Namhyung

> 
>  Performance counter stats for 'system wide':
> 
> CPU0          252,621,253      msr/tsc,cpu=cstate_pkg/
> 
>        0.101184092 seconds time elapsed
> ```
> 
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
>  tools/perf/util/parse-events.c | 37 +++++++++++++++++++++++++---------
>  1 file changed, 28 insertions(+), 9 deletions(-)
> 
> diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
> index 7a32d5234a64..ef38eb082342 100644
> --- a/tools/perf/util/parse-events.c
> +++ b/tools/perf/util/parse-events.c
> @@ -192,10 +192,20 @@ static struct perf_cpu_map *get_config_cpu(const struct parse_events_terms *head
>  
>  	list_for_each_entry(term, &head_terms->terms, list) {
>  		if (term->type_term == PARSE_EVENTS__TERM_TYPE_CPU) {
> -			struct perf_cpu_map *cpu = perf_cpu_map__new_int(term->val.num);
> +			struct perf_cpu_map *term_cpus;
>  
> -			perf_cpu_map__merge(&cpus, cpu);
> -			perf_cpu_map__put(cpu);
> +			if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) {
> +				term_cpus = perf_cpu_map__new_int(term->val.num);
> +			} else {
> +				struct perf_pmu *pmu = perf_pmus__find(term->val.str);
> +
> +				if (perf_cpu_map__is_empty(pmu->cpus))
> +					term_cpus = pmu->is_core ? cpu_map__online() : NULL;
> +				else
> +					term_cpus = perf_cpu_map__get(pmu->cpus);
> +			}
> +			perf_cpu_map__merge(&cpus, term_cpus);
> +			perf_cpu_map__put(term_cpus);
>  		}
>  	}
>  
> @@ -1054,12 +1064,21 @@ do {									   \
>  		}
>  		break;
>  	case PARSE_EVENTS__TERM_TYPE_CPU:
> -		CHECK_TYPE_VAL(NUM);
> -		if (term->val.num >= (u64)cpu__max_present_cpu().cpu) {
> -			parse_events_error__handle(err, term->err_val,
> -						strdup("too big"),
> -						NULL);
> -			return -EINVAL;
> +		if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) {
> +			if (term->val.num >= (u64)cpu__max_present_cpu().cpu) {
> +				parse_events_error__handle(err, term->err_val,
> +							strdup("too big"),
> +							/*help=*/NULL);
> +				return -EINVAL;
> +			}
> +		} else {
> +			assert(term->type_val == PARSE_EVENTS__TERM_TYPE_STR);
> +			if (perf_pmus__find(term->val.str) == NULL) {
> +				parse_events_error__handle(err, term->err_val,
> +							strdup("not a valid PMU"),
> +							/*help=*/NULL);
> +				return -EINVAL;
> +			}
>  		}
>  		break;
>  	case PARSE_EVENTS__TERM_TYPE_DRV_CFG:
> -- 
> 2.50.0.727.gbf7dc18ff4-goog
>
Re: [PATCH v1 04/12] perf parse-events: Allow the cpu term to be a PMU
Posted by Ian Rogers 2 months, 3 weeks ago
On Wed, Jul 16, 2025 at 1:09 PM Namhyung Kim <namhyung@kernel.org> wrote:
>
> On Fri, Jun 27, 2025 at 12:24:09PM -0700, Ian Rogers wrote:
> > On hybrid systems, events like msr/tsc/ will aggregate counts across
> > all CPUs. Often metrics only want a value like msr/tsc/ for the cores
> > on which the metric is being computed. Listing each CPU with terms
> > cpu=0,cpu=1.. is laborious and would need to be encoded for all
> > variations of a CPU model.
> >
> > Allow the cpumask from a PMU to be an argument to the cpu term. For
> > example in the following the cpumask of the cstate_pkg PMU selects the
> > CPUs to count msr/tsc/ counter upon:
> > ```
> > $ cat /sys/bus/event_source/devices/cstate_pkg/cpumask
> > 0
> > $ perf stat -A -e 'msr/tsc,cpu=cstate_pkg/' -a sleep 0.1
>
> It can be confusing if 'cpu' takes a number or a PMU name.  What about
> adding a new term (maybe 'cpu_from') to handle this case?

So it is possible for terms to be defined in sysfs in the 'format/' folder:
```
$ ls /sys/bus/event_source/devices/cpu_core/format/
cmask  edge  event  frontend  inv  ldlat  offcore_rsp  pc  umask
```
By not introducing a new term we leave 'cpu_from' open for use in this
way. When I spoke to Kan we thought using the existing term made sense
and fits the idea of leaving things open for the kernel/drivers to
use. It is possible to add a new term though. Let me know and I can
update the patch and documentation accordingly.

Thanks,
Ian

> Also please update the documentation.
>
> Thanks,
> Namhyung
>
> >
> >  Performance counter stats for 'system wide':
> >
> > CPU0          252,621,253      msr/tsc,cpu=cstate_pkg/
> >
> >        0.101184092 seconds time elapsed
> > ```
> >
> > Signed-off-by: Ian Rogers <irogers@google.com>
> > ---
> >  tools/perf/util/parse-events.c | 37 +++++++++++++++++++++++++---------
> >  1 file changed, 28 insertions(+), 9 deletions(-)
> >
> > diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
> > index 7a32d5234a64..ef38eb082342 100644
> > --- a/tools/perf/util/parse-events.c
> > +++ b/tools/perf/util/parse-events.c
> > @@ -192,10 +192,20 @@ static struct perf_cpu_map *get_config_cpu(const struct parse_events_terms *head
> >
> >       list_for_each_entry(term, &head_terms->terms, list) {
> >               if (term->type_term == PARSE_EVENTS__TERM_TYPE_CPU) {
> > -                     struct perf_cpu_map *cpu = perf_cpu_map__new_int(term->val.num);
> > +                     struct perf_cpu_map *term_cpus;
> >
> > -                     perf_cpu_map__merge(&cpus, cpu);
> > -                     perf_cpu_map__put(cpu);
> > +                     if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) {
> > +                             term_cpus = perf_cpu_map__new_int(term->val.num);
> > +                     } else {
> > +                             struct perf_pmu *pmu = perf_pmus__find(term->val.str);
> > +
> > +                             if (perf_cpu_map__is_empty(pmu->cpus))
> > +                                     term_cpus = pmu->is_core ? cpu_map__online() : NULL;
> > +                             else
> > +                                     term_cpus = perf_cpu_map__get(pmu->cpus);
> > +                     }
> > +                     perf_cpu_map__merge(&cpus, term_cpus);
> > +                     perf_cpu_map__put(term_cpus);
> >               }
> >       }
> >
> > @@ -1054,12 +1064,21 @@ do {                                                                     \
> >               }
> >               break;
> >       case PARSE_EVENTS__TERM_TYPE_CPU:
> > -             CHECK_TYPE_VAL(NUM);
> > -             if (term->val.num >= (u64)cpu__max_present_cpu().cpu) {
> > -                     parse_events_error__handle(err, term->err_val,
> > -                                             strdup("too big"),
> > -                                             NULL);
> > -                     return -EINVAL;
> > +             if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) {
> > +                     if (term->val.num >= (u64)cpu__max_present_cpu().cpu) {
> > +                             parse_events_error__handle(err, term->err_val,
> > +                                                     strdup("too big"),
> > +                                                     /*help=*/NULL);
> > +                             return -EINVAL;
> > +                     }
> > +             } else {
> > +                     assert(term->type_val == PARSE_EVENTS__TERM_TYPE_STR);
> > +                     if (perf_pmus__find(term->val.str) == NULL) {
> > +                             parse_events_error__handle(err, term->err_val,
> > +                                                     strdup("not a valid PMU"),
> > +                                                     /*help=*/NULL);
> > +                             return -EINVAL;
> > +                     }
> >               }
> >               break;
> >       case PARSE_EVENTS__TERM_TYPE_DRV_CFG:
> > --
> > 2.50.0.727.gbf7dc18ff4-goog
> >
Re: [PATCH v1 04/12] perf parse-events: Allow the cpu term to be a PMU
Posted by Namhyung Kim 2 months, 3 weeks ago
Hi Ian,

On Wed, Jul 16, 2025 at 01:25:17PM -0700, Ian Rogers wrote:
> On Wed, Jul 16, 2025 at 1:09 PM Namhyung Kim <namhyung@kernel.org> wrote:
> >
> > On Fri, Jun 27, 2025 at 12:24:09PM -0700, Ian Rogers wrote:
> > > On hybrid systems, events like msr/tsc/ will aggregate counts across
> > > all CPUs. Often metrics only want a value like msr/tsc/ for the cores
> > > on which the metric is being computed. Listing each CPU with terms
> > > cpu=0,cpu=1.. is laborious and would need to be encoded for all
> > > variations of a CPU model.
> > >
> > > Allow the cpumask from a PMU to be an argument to the cpu term. For
> > > example in the following the cpumask of the cstate_pkg PMU selects the
> > > CPUs to count msr/tsc/ counter upon:
> > > ```
> > > $ cat /sys/bus/event_source/devices/cstate_pkg/cpumask
> > > 0
> > > $ perf stat -A -e 'msr/tsc,cpu=cstate_pkg/' -a sleep 0.1
> >
> > It can be confusing if 'cpu' takes a number or a PMU name.  What about
> > adding a new term (maybe 'cpu_from') to handle this case?
> 
> So it is possible for terms to be defined in sysfs in the 'format/' folder:
> ```
> $ ls /sys/bus/event_source/devices/cpu_core/format/
> cmask  edge  event  frontend  inv  ldlat  offcore_rsp  pc  umask
> ```
> By not introducing a new term we leave 'cpu_from' open for use in this
> way. When I spoke to Kan we thought using the existing term made sense
> and fits the idea of leaving things open for the kernel/drivers to
> use. It is possible to add a new term though. Let me know and I can
> update the patch and documentation accordingly.

Oh, you thought about this already.  It's true that it's possible to
clash with PMU formats in sysfs unless we have a separate namespace for
tools somehow.  But that would add (maybe unnecessary) complexity.

So I'm not against with this change.  I just wanted to ring an alarm for
potential issues.  Up to you. :)

Thanks,
Namhyung