On hybrid systems, events like msr/tsc/ will aggregate counts across
all CPUs. Often metrics only want a value like msr/tsc/ for the cores
on which the metric is being computed. Listing each CPU with terms
cpu=0,cpu=1.. is laborious and would need to be encoded for all
variations of a CPU model.
Allow the cpumask from a PMU to be an argument to the cpu term. For
example in the following the cpumask of the cstate_pkg PMU selects the
CPUs to count msr/tsc/ counter upon:
```
$ cat /sys/bus/event_source/devices/cstate_pkg/cpumask
0
$ perf stat -A -e 'msr/tsc,cpu=cstate_pkg/' -a sleep 0.1
Performance counter stats for 'system wide':
CPU0 252,621,253 msr/tsc,cpu=cstate_pkg/
0.101184092 seconds time elapsed
```
Signed-off-by: Ian Rogers <irogers@google.com>
---
tools/perf/util/parse-events.c | 37 +++++++++++++++++++++++++---------
1 file changed, 28 insertions(+), 9 deletions(-)
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 7a32d5234a64..ef38eb082342 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -192,10 +192,20 @@ static struct perf_cpu_map *get_config_cpu(const struct parse_events_terms *head
list_for_each_entry(term, &head_terms->terms, list) {
if (term->type_term == PARSE_EVENTS__TERM_TYPE_CPU) {
- struct perf_cpu_map *cpu = perf_cpu_map__new_int(term->val.num);
+ struct perf_cpu_map *term_cpus;
- perf_cpu_map__merge(&cpus, cpu);
- perf_cpu_map__put(cpu);
+ if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) {
+ term_cpus = perf_cpu_map__new_int(term->val.num);
+ } else {
+ struct perf_pmu *pmu = perf_pmus__find(term->val.str);
+
+ if (perf_cpu_map__is_empty(pmu->cpus))
+ term_cpus = pmu->is_core ? cpu_map__online() : NULL;
+ else
+ term_cpus = perf_cpu_map__get(pmu->cpus);
+ }
+ perf_cpu_map__merge(&cpus, term_cpus);
+ perf_cpu_map__put(term_cpus);
}
}
@@ -1054,12 +1064,21 @@ do { \
}
break;
case PARSE_EVENTS__TERM_TYPE_CPU:
- CHECK_TYPE_VAL(NUM);
- if (term->val.num >= (u64)cpu__max_present_cpu().cpu) {
- parse_events_error__handle(err, term->err_val,
- strdup("too big"),
- NULL);
- return -EINVAL;
+ if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) {
+ if (term->val.num >= (u64)cpu__max_present_cpu().cpu) {
+ parse_events_error__handle(err, term->err_val,
+ strdup("too big"),
+ /*help=*/NULL);
+ return -EINVAL;
+ }
+ } else {
+ assert(term->type_val == PARSE_EVENTS__TERM_TYPE_STR);
+ if (perf_pmus__find(term->val.str) == NULL) {
+ parse_events_error__handle(err, term->err_val,
+ strdup("not a valid PMU"),
+ /*help=*/NULL);
+ return -EINVAL;
+ }
}
break;
case PARSE_EVENTS__TERM_TYPE_DRV_CFG:
--
2.50.0.727.gbf7dc18ff4-goog
On Fri, Jun 27, 2025 at 12:24:09PM -0700, Ian Rogers wrote: > On hybrid systems, events like msr/tsc/ will aggregate counts across > all CPUs. Often metrics only want a value like msr/tsc/ for the cores > on which the metric is being computed. Listing each CPU with terms > cpu=0,cpu=1.. is laborious and would need to be encoded for all > variations of a CPU model. > > Allow the cpumask from a PMU to be an argument to the cpu term. For > example in the following the cpumask of the cstate_pkg PMU selects the > CPUs to count msr/tsc/ counter upon: > ``` > $ cat /sys/bus/event_source/devices/cstate_pkg/cpumask > 0 > $ perf stat -A -e 'msr/tsc,cpu=cstate_pkg/' -a sleep 0.1 It can be confusing if 'cpu' takes a number or a PMU name. What about adding a new term (maybe 'cpu_from') to handle this case? Also please update the documentation. Thanks, Namhyung > > Performance counter stats for 'system wide': > > CPU0 252,621,253 msr/tsc,cpu=cstate_pkg/ > > 0.101184092 seconds time elapsed > ``` > > Signed-off-by: Ian Rogers <irogers@google.com> > --- > tools/perf/util/parse-events.c | 37 +++++++++++++++++++++++++--------- > 1 file changed, 28 insertions(+), 9 deletions(-) > > diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c > index 7a32d5234a64..ef38eb082342 100644 > --- a/tools/perf/util/parse-events.c > +++ b/tools/perf/util/parse-events.c > @@ -192,10 +192,20 @@ static struct perf_cpu_map *get_config_cpu(const struct parse_events_terms *head > > list_for_each_entry(term, &head_terms->terms, list) { > if (term->type_term == PARSE_EVENTS__TERM_TYPE_CPU) { > - struct perf_cpu_map *cpu = perf_cpu_map__new_int(term->val.num); > + struct perf_cpu_map *term_cpus; > > - perf_cpu_map__merge(&cpus, cpu); > - perf_cpu_map__put(cpu); > + if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) { > + term_cpus = perf_cpu_map__new_int(term->val.num); > + } else { > + struct perf_pmu *pmu = perf_pmus__find(term->val.str); > + > + if (perf_cpu_map__is_empty(pmu->cpus)) > + term_cpus = pmu->is_core ? cpu_map__online() : NULL; > + else > + term_cpus = perf_cpu_map__get(pmu->cpus); > + } > + perf_cpu_map__merge(&cpus, term_cpus); > + perf_cpu_map__put(term_cpus); > } > } > > @@ -1054,12 +1064,21 @@ do { \ > } > break; > case PARSE_EVENTS__TERM_TYPE_CPU: > - CHECK_TYPE_VAL(NUM); > - if (term->val.num >= (u64)cpu__max_present_cpu().cpu) { > - parse_events_error__handle(err, term->err_val, > - strdup("too big"), > - NULL); > - return -EINVAL; > + if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) { > + if (term->val.num >= (u64)cpu__max_present_cpu().cpu) { > + parse_events_error__handle(err, term->err_val, > + strdup("too big"), > + /*help=*/NULL); > + return -EINVAL; > + } > + } else { > + assert(term->type_val == PARSE_EVENTS__TERM_TYPE_STR); > + if (perf_pmus__find(term->val.str) == NULL) { > + parse_events_error__handle(err, term->err_val, > + strdup("not a valid PMU"), > + /*help=*/NULL); > + return -EINVAL; > + } > } > break; > case PARSE_EVENTS__TERM_TYPE_DRV_CFG: > -- > 2.50.0.727.gbf7dc18ff4-goog >
On Wed, Jul 16, 2025 at 1:09 PM Namhyung Kim <namhyung@kernel.org> wrote: > > On Fri, Jun 27, 2025 at 12:24:09PM -0700, Ian Rogers wrote: > > On hybrid systems, events like msr/tsc/ will aggregate counts across > > all CPUs. Often metrics only want a value like msr/tsc/ for the cores > > on which the metric is being computed. Listing each CPU with terms > > cpu=0,cpu=1.. is laborious and would need to be encoded for all > > variations of a CPU model. > > > > Allow the cpumask from a PMU to be an argument to the cpu term. For > > example in the following the cpumask of the cstate_pkg PMU selects the > > CPUs to count msr/tsc/ counter upon: > > ``` > > $ cat /sys/bus/event_source/devices/cstate_pkg/cpumask > > 0 > > $ perf stat -A -e 'msr/tsc,cpu=cstate_pkg/' -a sleep 0.1 > > It can be confusing if 'cpu' takes a number or a PMU name. What about > adding a new term (maybe 'cpu_from') to handle this case? So it is possible for terms to be defined in sysfs in the 'format/' folder: ``` $ ls /sys/bus/event_source/devices/cpu_core/format/ cmask edge event frontend inv ldlat offcore_rsp pc umask ``` By not introducing a new term we leave 'cpu_from' open for use in this way. When I spoke to Kan we thought using the existing term made sense and fits the idea of leaving things open for the kernel/drivers to use. It is possible to add a new term though. Let me know and I can update the patch and documentation accordingly. Thanks, Ian > Also please update the documentation. > > Thanks, > Namhyung > > > > > Performance counter stats for 'system wide': > > > > CPU0 252,621,253 msr/tsc,cpu=cstate_pkg/ > > > > 0.101184092 seconds time elapsed > > ``` > > > > Signed-off-by: Ian Rogers <irogers@google.com> > > --- > > tools/perf/util/parse-events.c | 37 +++++++++++++++++++++++++--------- > > 1 file changed, 28 insertions(+), 9 deletions(-) > > > > diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c > > index 7a32d5234a64..ef38eb082342 100644 > > --- a/tools/perf/util/parse-events.c > > +++ b/tools/perf/util/parse-events.c > > @@ -192,10 +192,20 @@ static struct perf_cpu_map *get_config_cpu(const struct parse_events_terms *head > > > > list_for_each_entry(term, &head_terms->terms, list) { > > if (term->type_term == PARSE_EVENTS__TERM_TYPE_CPU) { > > - struct perf_cpu_map *cpu = perf_cpu_map__new_int(term->val.num); > > + struct perf_cpu_map *term_cpus; > > > > - perf_cpu_map__merge(&cpus, cpu); > > - perf_cpu_map__put(cpu); > > + if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) { > > + term_cpus = perf_cpu_map__new_int(term->val.num); > > + } else { > > + struct perf_pmu *pmu = perf_pmus__find(term->val.str); > > + > > + if (perf_cpu_map__is_empty(pmu->cpus)) > > + term_cpus = pmu->is_core ? cpu_map__online() : NULL; > > + else > > + term_cpus = perf_cpu_map__get(pmu->cpus); > > + } > > + perf_cpu_map__merge(&cpus, term_cpus); > > + perf_cpu_map__put(term_cpus); > > } > > } > > > > @@ -1054,12 +1064,21 @@ do { \ > > } > > break; > > case PARSE_EVENTS__TERM_TYPE_CPU: > > - CHECK_TYPE_VAL(NUM); > > - if (term->val.num >= (u64)cpu__max_present_cpu().cpu) { > > - parse_events_error__handle(err, term->err_val, > > - strdup("too big"), > > - NULL); > > - return -EINVAL; > > + if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) { > > + if (term->val.num >= (u64)cpu__max_present_cpu().cpu) { > > + parse_events_error__handle(err, term->err_val, > > + strdup("too big"), > > + /*help=*/NULL); > > + return -EINVAL; > > + } > > + } else { > > + assert(term->type_val == PARSE_EVENTS__TERM_TYPE_STR); > > + if (perf_pmus__find(term->val.str) == NULL) { > > + parse_events_error__handle(err, term->err_val, > > + strdup("not a valid PMU"), > > + /*help=*/NULL); > > + return -EINVAL; > > + } > > } > > break; > > case PARSE_EVENTS__TERM_TYPE_DRV_CFG: > > -- > > 2.50.0.727.gbf7dc18ff4-goog > >
Hi Ian, On Wed, Jul 16, 2025 at 01:25:17PM -0700, Ian Rogers wrote: > On Wed, Jul 16, 2025 at 1:09 PM Namhyung Kim <namhyung@kernel.org> wrote: > > > > On Fri, Jun 27, 2025 at 12:24:09PM -0700, Ian Rogers wrote: > > > On hybrid systems, events like msr/tsc/ will aggregate counts across > > > all CPUs. Often metrics only want a value like msr/tsc/ for the cores > > > on which the metric is being computed. Listing each CPU with terms > > > cpu=0,cpu=1.. is laborious and would need to be encoded for all > > > variations of a CPU model. > > > > > > Allow the cpumask from a PMU to be an argument to the cpu term. For > > > example in the following the cpumask of the cstate_pkg PMU selects the > > > CPUs to count msr/tsc/ counter upon: > > > ``` > > > $ cat /sys/bus/event_source/devices/cstate_pkg/cpumask > > > 0 > > > $ perf stat -A -e 'msr/tsc,cpu=cstate_pkg/' -a sleep 0.1 > > > > It can be confusing if 'cpu' takes a number or a PMU name. What about > > adding a new term (maybe 'cpu_from') to handle this case? > > So it is possible for terms to be defined in sysfs in the 'format/' folder: > ``` > $ ls /sys/bus/event_source/devices/cpu_core/format/ > cmask edge event frontend inv ldlat offcore_rsp pc umask > ``` > By not introducing a new term we leave 'cpu_from' open for use in this > way. When I spoke to Kan we thought using the existing term made sense > and fits the idea of leaving things open for the kernel/drivers to > use. It is possible to add a new term though. Let me know and I can > update the patch and documentation accordingly. Oh, you thought about this already. It's true that it's possible to clash with PMU formats in sysfs unless we have a separate namespace for tools somehow. But that would add (maybe unnecessary) complexity. So I'm not against with this change. I just wanted to ring an alarm for potential issues. Up to you. :) Thanks, Namhyung
© 2016 - 2025 Red Hat, Inc.