Rather than using the first evsel in the matched events, try to find
the least shared non-tool evsel. The aim is to pick the first evsel
that typifies the metric within the list of metrics.
This addresses an issue where Default metric group metrics may lose
their counter value due to how the stat displaying hides counters for
default event/metric output.
For a metricgroup like TopdownL1 on an Intel Alderlake the change is,
before there are 4 events with metrics:
```
$ perf stat -M topdownL1 -a sleep 1
Performance counter stats for 'system wide':
7,782,334,296 cpu_core/TOPDOWN.SLOTS/ # 10.4 % tma_bad_speculation
# 19.7 % tma_frontend_bound
2,668,927,977 cpu_core/topdown-retiring/ # 35.7 % tma_backend_bound
# 34.1 % tma_retiring
803,623,987 cpu_core/topdown-bad-spec/
167,514,386 cpu_core/topdown-heavy-ops/
1,555,265,776 cpu_core/topdown-fe-bound/
2,792,733,013 cpu_core/topdown-be-bound/
279,769,310 cpu_atom/TOPDOWN_RETIRING.ALL/ # 12.2 % tma_retiring
# 15.1 % tma_bad_speculation
457,917,232 cpu_atom/CPU_CLK_UNHALTED.CORE/ # 38.4 % tma_backend_bound
# 34.2 % tma_frontend_bound
783,519,226 cpu_atom/TOPDOWN_FE_BOUND.ALL/
10,790,192 cpu_core/INT_MISC.UOP_DROPPING/
879,845,633 cpu_atom/TOPDOWN_BE_BOUND.ALL/
```
After there are 6 events with metrics:
```
$ perf stat -M topdownL1 -a sleep 1
Performance counter stats for 'system wide':
2,377,551,258 cpu_core/TOPDOWN.SLOTS/ # 7.9 % tma_bad_speculation
# 36.4 % tma_frontend_bound
480,791,142 cpu_core/topdown-retiring/ # 35.5 % tma_backend_bound
186,323,991 cpu_core/topdown-bad-spec/
65,070,590 cpu_core/topdown-heavy-ops/ # 20.1 % tma_retiring
871,733,444 cpu_core/topdown-fe-bound/
848,286,598 cpu_core/topdown-be-bound/
260,936,456 cpu_atom/TOPDOWN_RETIRING.ALL/ # 12.4 % tma_retiring
# 17.6 % tma_bad_speculation
419,576,513 cpu_atom/CPU_CLK_UNHALTED.CORE/
797,132,597 cpu_atom/TOPDOWN_FE_BOUND.ALL/ # 38.0 % tma_frontend_bound
3,055,447 cpu_core/INT_MISC.UOP_DROPPING/
671,014,164 cpu_atom/TOPDOWN_BE_BOUND.ALL/ # 32.0 % tma_backend_bound
```
Signed-off-by: Ian Rogers <irogers@google.com>
---
tools/perf/util/metricgroup.c | 48 ++++++++++++++++++++++++++++++++++-
1 file changed, 47 insertions(+), 1 deletion(-)
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 48936e517803..76092ee26761 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -1323,6 +1323,51 @@ static int parse_ids(bool metric_no_merge, bool fake_pmu,
return ret;
}
+/* How many times will a given evsel be used in a set of metrics? */
+static int count_uses(struct list_head *metric_list, struct evsel *evsel)
+{
+ const char *metric_id = evsel__metric_id(evsel);
+ struct metric *m;
+ int uses = 0;
+
+ list_for_each_entry(m, metric_list, nd) {
+ if (hashmap__find(m->pctx->ids, metric_id, NULL))
+ uses++;
+ }
+ return uses;
+}
+
+/*
+ * Select the evsel that stat-display will use to trigger shadow/metric
+ * printing. Pick the least shared non-tool evsel, encouraging metrics to be
+ * with a hardware counter that is specific to them.
+ */
+static struct evsel *pick_display_evsel(struct list_head *metric_list,
+ struct evsel **metric_events)
+{
+ struct evsel *selected = metric_events[0];
+ size_t selected_uses;
+ bool selected_is_tool;
+
+ if (!selected)
+ return NULL;
+
+ selected_uses = count_uses(metric_list, selected);
+ selected_is_tool = evsel__is_tool(selected);
+ for (int i = 1; metric_events[i]; i++) {
+ struct evsel *candidate = metric_events[i];
+ size_t candidate_uses = count_uses(metric_list, candidate);
+
+ if ((selected_is_tool && !evsel__is_tool(candidate)) ||
+ (candidate_uses < selected_uses)) {
+ selected = candidate;
+ selected_uses = candidate_uses;
+ selected_is_tool = evsel__is_tool(selected);
+ }
+ }
+ return selected;
+}
+
static int parse_groups(struct evlist *perf_evlist,
const char *pmu, const char *str,
bool metric_no_group,
@@ -1430,7 +1475,8 @@ static int parse_groups(struct evlist *perf_evlist,
goto out;
}
- me = metricgroup__lookup(&perf_evlist->metric_events, metric_events[0],
+ me = metricgroup__lookup(&perf_evlist->metric_events,
+ pick_display_evsel(&metric_list, metric_events),
/*create=*/true);
expr = malloc(sizeof(struct metric_expr));
--
2.51.2.1041.gc1ab5b90ca-goog
On 11/11/2025 12:04 PM, Ian Rogers wrote:
> Rather than using the first evsel in the matched events, try to find
> the least shared non-tool evsel. The aim is to pick the first evsel
> that typifies the metric within the list of metrics.
>
> This addresses an issue where Default metric group metrics may lose
> their counter value due to how the stat displaying hides counters for
> default event/metric output.
>
> For a metricgroup like TopdownL1 on an Intel Alderlake the change is,
> before there are 4 events with metrics:
> ```
> $ perf stat -M topdownL1 -a sleep 1
>
> Performance counter stats for 'system wide':
>
> 7,782,334,296 cpu_core/TOPDOWN.SLOTS/ # 10.4 % tma_bad_speculation
> # 19.7 % tma_frontend_bound
> 2,668,927,977 cpu_core/topdown-retiring/ # 35.7 % tma_backend_bound
> # 34.1 % tma_retiring
> 803,623,987 cpu_core/topdown-bad-spec/
> 167,514,386 cpu_core/topdown-heavy-ops/
> 1,555,265,776 cpu_core/topdown-fe-bound/
> 2,792,733,013 cpu_core/topdown-be-bound/
> 279,769,310 cpu_atom/TOPDOWN_RETIRING.ALL/ # 12.2 % tma_retiring
> # 15.1 % tma_bad_speculation
> 457,917,232 cpu_atom/CPU_CLK_UNHALTED.CORE/ # 38.4 % tma_backend_bound
> # 34.2 % tma_frontend_bound
> 783,519,226 cpu_atom/TOPDOWN_FE_BOUND.ALL/
> 10,790,192 cpu_core/INT_MISC.UOP_DROPPING/
> 879,845,633 cpu_atom/TOPDOWN_BE_BOUND.ALL/
> ```
>
> After there are 6 events with metrics:
> ```
> $ perf stat -M topdownL1 -a sleep 1
>
> Performance counter stats for 'system wide':
>
> 2,377,551,258 cpu_core/TOPDOWN.SLOTS/ # 7.9 % tma_bad_speculation
> # 36.4 % tma_frontend_bound
> 480,791,142 cpu_core/topdown-retiring/ # 35.5 % tma_backend_bound
> 186,323,991 cpu_core/topdown-bad-spec/
> 65,070,590 cpu_core/topdown-heavy-ops/ # 20.1 % tma_retiring
> 871,733,444 cpu_core/topdown-fe-bound/
> 848,286,598 cpu_core/topdown-be-bound/
> 260,936,456 cpu_atom/TOPDOWN_RETIRING.ALL/ # 12.4 % tma_retiring
> # 17.6 % tma_bad_speculation
> 419,576,513 cpu_atom/CPU_CLK_UNHALTED.CORE/
> 797,132,597 cpu_atom/TOPDOWN_FE_BOUND.ALL/ # 38.0 % tma_frontend_bound
> 3,055,447 cpu_core/INT_MISC.UOP_DROPPING/
> 671,014,164 cpu_atom/TOPDOWN_BE_BOUND.ALL/ # 32.0 % tma_backend_bound
> ```
It looks the output of cpu_core and cpu_atom events are mixed together,
like the "cpu_core/INT_MISC.UOP_DROPPING/". Could we resort the events and
separate the cpu_core and cpu_atom events output? It would make the output
more read-friendly. Thanks.
>
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
> tools/perf/util/metricgroup.c | 48 ++++++++++++++++++++++++++++++++++-
> 1 file changed, 47 insertions(+), 1 deletion(-)
>
> diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
> index 48936e517803..76092ee26761 100644
> --- a/tools/perf/util/metricgroup.c
> +++ b/tools/perf/util/metricgroup.c
> @@ -1323,6 +1323,51 @@ static int parse_ids(bool metric_no_merge, bool fake_pmu,
> return ret;
> }
>
> +/* How many times will a given evsel be used in a set of metrics? */
> +static int count_uses(struct list_head *metric_list, struct evsel *evsel)
> +{
> + const char *metric_id = evsel__metric_id(evsel);
> + struct metric *m;
> + int uses = 0;
> +
> + list_for_each_entry(m, metric_list, nd) {
> + if (hashmap__find(m->pctx->ids, metric_id, NULL))
> + uses++;
> + }
> + return uses;
> +}
> +
> +/*
> + * Select the evsel that stat-display will use to trigger shadow/metric
> + * printing. Pick the least shared non-tool evsel, encouraging metrics to be
> + * with a hardware counter that is specific to them.
> + */
> +static struct evsel *pick_display_evsel(struct list_head *metric_list,
> + struct evsel **metric_events)
> +{
> + struct evsel *selected = metric_events[0];
> + size_t selected_uses;
> + bool selected_is_tool;
> +
> + if (!selected)
> + return NULL;
> +
> + selected_uses = count_uses(metric_list, selected);
> + selected_is_tool = evsel__is_tool(selected);
> + for (int i = 1; metric_events[i]; i++) {
> + struct evsel *candidate = metric_events[i];
> + size_t candidate_uses = count_uses(metric_list, candidate);
> +
> + if ((selected_is_tool && !evsel__is_tool(candidate)) ||
> + (candidate_uses < selected_uses)) {
> + selected = candidate;
> + selected_uses = candidate_uses;
> + selected_is_tool = evsel__is_tool(selected);
> + }
> + }
> + return selected;
> +}
> +
> static int parse_groups(struct evlist *perf_evlist,
> const char *pmu, const char *str,
> bool metric_no_group,
> @@ -1430,7 +1475,8 @@ static int parse_groups(struct evlist *perf_evlist,
> goto out;
> }
>
> - me = metricgroup__lookup(&perf_evlist->metric_events, metric_events[0],
> + me = metricgroup__lookup(&perf_evlist->metric_events,
> + pick_display_evsel(&metric_list, metric_events),
> /*create=*/true);
>
> expr = malloc(sizeof(struct metric_expr));
On Tue, Nov 11, 2025 at 12:15 AM Mi, Dapeng <dapeng1.mi@linux.intel.com> wrote:
>
>
> On 11/11/2025 12:04 PM, Ian Rogers wrote:
> > Rather than using the first evsel in the matched events, try to find
> > the least shared non-tool evsel. The aim is to pick the first evsel
> > that typifies the metric within the list of metrics.
> >
> > This addresses an issue where Default metric group metrics may lose
> > their counter value due to how the stat displaying hides counters for
> > default event/metric output.
> >
> > For a metricgroup like TopdownL1 on an Intel Alderlake the change is,
> > before there are 4 events with metrics:
> > ```
> > $ perf stat -M topdownL1 -a sleep 1
> >
> > Performance counter stats for 'system wide':
> >
> > 7,782,334,296 cpu_core/TOPDOWN.SLOTS/ # 10.4 % tma_bad_speculation
> > # 19.7 % tma_frontend_bound
> > 2,668,927,977 cpu_core/topdown-retiring/ # 35.7 % tma_backend_bound
> > # 34.1 % tma_retiring
> > 803,623,987 cpu_core/topdown-bad-spec/
> > 167,514,386 cpu_core/topdown-heavy-ops/
> > 1,555,265,776 cpu_core/topdown-fe-bound/
> > 2,792,733,013 cpu_core/topdown-be-bound/
> > 279,769,310 cpu_atom/TOPDOWN_RETIRING.ALL/ # 12.2 % tma_retiring
> > # 15.1 % tma_bad_speculation
> > 457,917,232 cpu_atom/CPU_CLK_UNHALTED.CORE/ # 38.4 % tma_backend_bound
> > # 34.2 % tma_frontend_bound
> > 783,519,226 cpu_atom/TOPDOWN_FE_BOUND.ALL/
> > 10,790,192 cpu_core/INT_MISC.UOP_DROPPING/
> > 879,845,633 cpu_atom/TOPDOWN_BE_BOUND.ALL/
> > ```
> >
> > After there are 6 events with metrics:
> > ```
> > $ perf stat -M topdownL1 -a sleep 1
> >
> > Performance counter stats for 'system wide':
> >
> > 2,377,551,258 cpu_core/TOPDOWN.SLOTS/ # 7.9 % tma_bad_speculation
> > # 36.4 % tma_frontend_bound
> > 480,791,142 cpu_core/topdown-retiring/ # 35.5 % tma_backend_bound
> > 186,323,991 cpu_core/topdown-bad-spec/
> > 65,070,590 cpu_core/topdown-heavy-ops/ # 20.1 % tma_retiring
> > 871,733,444 cpu_core/topdown-fe-bound/
> > 848,286,598 cpu_core/topdown-be-bound/
> > 260,936,456 cpu_atom/TOPDOWN_RETIRING.ALL/ # 12.4 % tma_retiring
> > # 17.6 % tma_bad_speculation
> > 419,576,513 cpu_atom/CPU_CLK_UNHALTED.CORE/
> > 797,132,597 cpu_atom/TOPDOWN_FE_BOUND.ALL/ # 38.0 % tma_frontend_bound
> > 3,055,447 cpu_core/INT_MISC.UOP_DROPPING/
> > 671,014,164 cpu_atom/TOPDOWN_BE_BOUND.ALL/ # 32.0 % tma_backend_bound
> > ```
>
> It looks the output of cpu_core and cpu_atom events are mixed together,
> like the "cpu_core/INT_MISC.UOP_DROPPING/". Could we resort the events and
> separate the cpu_core and cpu_atom events output? It would make the output
> more read-friendly. Thanks.
So the metrics are tagged as to not group the events:
https://web.git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json?h=perf-tools-next#n117
Running with each metric causes the output to be:
```
$ perf stat -M tma_bad_speculation,tma_backend_bound,tma_frontend_bound,tma_retiring
-a sleep 1
Performance counter stats for 'system wide':
1,615,145,897 cpu_core/TOPDOWN.SLOTS/ # 8.1 %
tma_bad_speculation
# 42.5 %
tma_frontend_bound (49.89%)
243,037,087 cpu_core/topdown-retiring/ # 34.5 %
tma_backend_bound (49.89%)
129,341,306 cpu_core/topdown-bad-spec/
(49.89%)
2,679,894 cpu_core/INT_MISC.UOP_DROPPING/
(49.89%)
696,940,348 cpu_core/topdown-fe-bound/
(49.89%)
563,319,011 cpu_core/topdown-be-bound/
(49.89%)
1,795,034,847 cpu_core/slots/
(50.11%)
262,140,961 cpu_core/topdown-retiring/
(50.11%)
44,589,349 cpu_core/topdown-heavy-ops/ # 14.4 %
tma_retiring (50.11%)
160,987,341 cpu_core/topdown-bad-spec/
(50.11%)
778,250,364 cpu_core/topdown-fe-bound/
(50.11%)
622,499,674 cpu_core/topdown-be-bound/
(50.11%)
90,849,750 cpu_atom/TOPDOWN_RETIRING.ALL/ # 8.1 %
tma_retiring
# 17.2 %
tma_bad_speculation
223,878,243 cpu_atom/CPU_CLK_UNHALTED.CORE/
423,068,733 cpu_atom/TOPDOWN_FE_BOUND.ALL/ # 37.8 %
tma_frontend_bound
413,413,499 cpu_atom/TOPDOWN_BE_BOUND.ALL/ # 36.9 %
tma_backend_bound
```
so you can see that it is the effect of not grouping the events that
leads to the cpu_core and cpu_atom split.
The code that does sorting/fixing/adding of events, primarily to fix
topdown, is parse_events__sort_events_and_fix_groups:
https://web.git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/util/parse-events.c?h=perf-tools-next#n2030
but I've tried to make that code respect the incoming evsel list order
because if a user specifies an order then they generally expect it to
be respected (unless invalid or because of topdown events). For
--metric-only the event order doesn't really matter.
Anyway, I think trying to fix this is out of scope for this patch
series, although I agree with you about the readability. The behavior
here matches old behavior such as:
```
$ perf --version
perf version 6.16.12
$ perf stat -M TopdownL1 -a sleep 1
Performance counter stats for 'system wide':
11,086,754,658 cpu_core/TOPDOWN.SLOTS/ # 27.1 %
tma_backend_bound
# 7.5 %
tma_bad_speculation
# 36.5 %
tma_frontend_bound
# 28.9 %
tma_retiring
3,219,475,010 cpu_core/topdown-retiring/
820,655,931 cpu_core/topdown-bad-spec/
418,883,912 cpu_core/topdown-heavy-ops/
4,082,884,459 cpu_core/topdown-fe-bound/
3,012,532,414 cpu_core/topdown-be-bound/
1,030,171,196 cpu_atom/TOPDOWN_RETIRING.ALL/ # 17.4 %
tma_retiring
# 16.5 %
tma_bad_speculation
1,185,093,601 cpu_atom/CPU_CLK_UNHALTED.CORE/ # 29.8 %
tma_backend_bound
# 36.4 %
tma_frontend_bound
2,154,914,153 cpu_atom/TOPDOWN_FE_BOUND.ALL/
14,988,684 cpu_core/INT_MISC.UOP_DROPPING/
1,763,486,868 cpu_atom/TOPDOWN_BE_BOUND.ALL/
1.004103365 seconds time elapsed
```
ie the cpu_core and cpu_atom mixing of events isn't a regression
introduced here. There isn't a simple fix for the ordering, as we
don't want to mess up the non-metric cases. I'm happy if you think
things can be otherwise to make a change.
Thanks,
Ian
On Tue, Nov 11, 2025 at 09:20:30AM -0800, Ian Rogers wrote: > On Tue, Nov 11, 2025 at 12:15 AM Mi, Dapeng <dapeng1.mi@linux.intel.com> wrote: > > > > > > On 11/11/2025 12:04 PM, Ian Rogers wrote: > > > Rather than using the first evsel in the matched events, try to find > > > the least shared non-tool evsel. The aim is to pick the first evsel > > > that typifies the metric within the list of metrics. > > > > > > This addresses an issue where Default metric group metrics may lose > > > their counter value due to how the stat displaying hides counters for > > > default event/metric output. > > > > > > For a metricgroup like TopdownL1 on an Intel Alderlake the change is, > > > before there are 4 events with metrics: > > > ``` > > > $ perf stat -M topdownL1 -a sleep 1 > > > > > > Performance counter stats for 'system wide': > > > > > > 7,782,334,296 cpu_core/TOPDOWN.SLOTS/ # 10.4 % tma_bad_speculation > > > # 19.7 % tma_frontend_bound > > > 2,668,927,977 cpu_core/topdown-retiring/ # 35.7 % tma_backend_bound > > > # 34.1 % tma_retiring > > > 803,623,987 cpu_core/topdown-bad-spec/ > > > 167,514,386 cpu_core/topdown-heavy-ops/ > > > 1,555,265,776 cpu_core/topdown-fe-bound/ > > > 2,792,733,013 cpu_core/topdown-be-bound/ > > > 279,769,310 cpu_atom/TOPDOWN_RETIRING.ALL/ # 12.2 % tma_retiring > > > # 15.1 % tma_bad_speculation > > > 457,917,232 cpu_atom/CPU_CLK_UNHALTED.CORE/ # 38.4 % tma_backend_bound > > > # 34.2 % tma_frontend_bound > > > 783,519,226 cpu_atom/TOPDOWN_FE_BOUND.ALL/ > > > 10,790,192 cpu_core/INT_MISC.UOP_DROPPING/ > > > 879,845,633 cpu_atom/TOPDOWN_BE_BOUND.ALL/ > > > ``` > > > > > > After there are 6 events with metrics: > > > ``` > > > $ perf stat -M topdownL1 -a sleep 1 > > > > > > Performance counter stats for 'system wide': > > > > > > 2,377,551,258 cpu_core/TOPDOWN.SLOTS/ # 7.9 % tma_bad_speculation > > > # 36.4 % tma_frontend_bound > > > 480,791,142 cpu_core/topdown-retiring/ # 35.5 % tma_backend_bound > > > 186,323,991 cpu_core/topdown-bad-spec/ > > > 65,070,590 cpu_core/topdown-heavy-ops/ # 20.1 % tma_retiring > > > 871,733,444 cpu_core/topdown-fe-bound/ > > > 848,286,598 cpu_core/topdown-be-bound/ > > > 260,936,456 cpu_atom/TOPDOWN_RETIRING.ALL/ # 12.4 % tma_retiring > > > # 17.6 % tma_bad_speculation > > > 419,576,513 cpu_atom/CPU_CLK_UNHALTED.CORE/ > > > 797,132,597 cpu_atom/TOPDOWN_FE_BOUND.ALL/ # 38.0 % tma_frontend_bound > > > 3,055,447 cpu_core/INT_MISC.UOP_DROPPING/ > > > 671,014,164 cpu_atom/TOPDOWN_BE_BOUND.ALL/ # 32.0 % tma_backend_bound > > > ``` > > > > It looks the output of cpu_core and cpu_atom events are mixed together, > > like the "cpu_core/INT_MISC.UOP_DROPPING/". Could we resort the events and > > separate the cpu_core and cpu_atom events output? It would make the output > > more read-friendly. Thanks. > > So the metrics are tagged as to not group the events: > https://web.git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json?h=perf-tools-next#n117 > Running with each metric causes the output to be: > ``` > $ perf stat -M tma_bad_speculation,tma_backend_bound,tma_frontend_bound,tma_retiring > -a sleep 1 > > Performance counter stats for 'system wide': > > 1,615,145,897 cpu_core/TOPDOWN.SLOTS/ # 8.1 % > tma_bad_speculation > # 42.5 % > tma_frontend_bound (49.89%) > 243,037,087 cpu_core/topdown-retiring/ # 34.5 % > tma_backend_bound (49.89%) > 129,341,306 cpu_core/topdown-bad-spec/ > (49.89%) > 2,679,894 cpu_core/INT_MISC.UOP_DROPPING/ > (49.89%) > 696,940,348 cpu_core/topdown-fe-bound/ > (49.89%) > 563,319,011 cpu_core/topdown-be-bound/ > (49.89%) > 1,795,034,847 cpu_core/slots/ > (50.11%) > 262,140,961 cpu_core/topdown-retiring/ > (50.11%) > 44,589,349 cpu_core/topdown-heavy-ops/ # 14.4 % > tma_retiring (50.11%) > 160,987,341 cpu_core/topdown-bad-spec/ > (50.11%) > 778,250,364 cpu_core/topdown-fe-bound/ > (50.11%) > 622,499,674 cpu_core/topdown-be-bound/ > (50.11%) > 90,849,750 cpu_atom/TOPDOWN_RETIRING.ALL/ # 8.1 % > tma_retiring > # 17.2 % > tma_bad_speculation > 223,878,243 cpu_atom/CPU_CLK_UNHALTED.CORE/ > 423,068,733 cpu_atom/TOPDOWN_FE_BOUND.ALL/ # 37.8 % > tma_frontend_bound > 413,413,499 cpu_atom/TOPDOWN_BE_BOUND.ALL/ # 36.9 % > tma_backend_bound > ``` > so you can see that it is the effect of not grouping the events that > leads to the cpu_core and cpu_atom split. > > The code that does sorting/fixing/adding of events, primarily to fix > topdown, is parse_events__sort_events_and_fix_groups: > https://web.git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/util/parse-events.c?h=perf-tools-next#n2030 > but I've tried to make that code respect the incoming evsel list order > because if a user specifies an order then they generally expect it to > be respected (unless invalid or because of topdown events). For > --metric-only the event order doesn't really matter. > > Anyway, I think trying to fix this is out of scope for this patch > series, although I agree with you about the readability. The behavior > here matches old behavior such as: > ``` > $ perf --version > perf version 6.16.12 > $ perf stat -M TopdownL1 -a sleep 1 > > Performance counter stats for 'system wide': > > 11,086,754,658 cpu_core/TOPDOWN.SLOTS/ # 27.1 % > tma_backend_bound > # 7.5 % > tma_bad_speculation > # 36.5 % > tma_frontend_bound > # 28.9 % > tma_retiring > 3,219,475,010 cpu_core/topdown-retiring/ > 820,655,931 cpu_core/topdown-bad-spec/ > 418,883,912 cpu_core/topdown-heavy-ops/ > 4,082,884,459 cpu_core/topdown-fe-bound/ > 3,012,532,414 cpu_core/topdown-be-bound/ > 1,030,171,196 cpu_atom/TOPDOWN_RETIRING.ALL/ # 17.4 % > tma_retiring > # 16.5 % > tma_bad_speculation > 1,185,093,601 cpu_atom/CPU_CLK_UNHALTED.CORE/ # 29.8 % > tma_backend_bound > # 36.4 % > tma_frontend_bound > 2,154,914,153 cpu_atom/TOPDOWN_FE_BOUND.ALL/ > 14,988,684 cpu_core/INT_MISC.UOP_DROPPING/ > 1,763,486,868 cpu_atom/TOPDOWN_BE_BOUND.ALL/ > > 1.004103365 seconds time elapsed > ``` > ie the cpu_core and cpu_atom mixing of events isn't a regression > introduced here. There isn't a simple fix for the ordering, as we > don't want to mess up the non-metric cases. I'm happy if you think > things can be otherwise to make a change. Agreed and it should be handled in a separate patch (series). Let's fix problems one at a time. Thanks, Namhyung
On 11/12/2025 3:05 AM, Namhyung Kim wrote: > On Tue, Nov 11, 2025 at 09:20:30AM -0800, Ian Rogers wrote: >> On Tue, Nov 11, 2025 at 12:15 AM Mi, Dapeng <dapeng1.mi@linux.intel.com> wrote: >>> >>> On 11/11/2025 12:04 PM, Ian Rogers wrote: >>>> Rather than using the first evsel in the matched events, try to find >>>> the least shared non-tool evsel. The aim is to pick the first evsel >>>> that typifies the metric within the list of metrics. >>>> >>>> This addresses an issue where Default metric group metrics may lose >>>> their counter value due to how the stat displaying hides counters for >>>> default event/metric output. >>>> >>>> For a metricgroup like TopdownL1 on an Intel Alderlake the change is, >>>> before there are 4 events with metrics: >>>> ``` >>>> $ perf stat -M topdownL1 -a sleep 1 >>>> >>>> Performance counter stats for 'system wide': >>>> >>>> 7,782,334,296 cpu_core/TOPDOWN.SLOTS/ # 10.4 % tma_bad_speculation >>>> # 19.7 % tma_frontend_bound >>>> 2,668,927,977 cpu_core/topdown-retiring/ # 35.7 % tma_backend_bound >>>> # 34.1 % tma_retiring >>>> 803,623,987 cpu_core/topdown-bad-spec/ >>>> 167,514,386 cpu_core/topdown-heavy-ops/ >>>> 1,555,265,776 cpu_core/topdown-fe-bound/ >>>> 2,792,733,013 cpu_core/topdown-be-bound/ >>>> 279,769,310 cpu_atom/TOPDOWN_RETIRING.ALL/ # 12.2 % tma_retiring >>>> # 15.1 % tma_bad_speculation >>>> 457,917,232 cpu_atom/CPU_CLK_UNHALTED.CORE/ # 38.4 % tma_backend_bound >>>> # 34.2 % tma_frontend_bound >>>> 783,519,226 cpu_atom/TOPDOWN_FE_BOUND.ALL/ >>>> 10,790,192 cpu_core/INT_MISC.UOP_DROPPING/ >>>> 879,845,633 cpu_atom/TOPDOWN_BE_BOUND.ALL/ >>>> ``` >>>> >>>> After there are 6 events with metrics: >>>> ``` >>>> $ perf stat -M topdownL1 -a sleep 1 >>>> >>>> Performance counter stats for 'system wide': >>>> >>>> 2,377,551,258 cpu_core/TOPDOWN.SLOTS/ # 7.9 % tma_bad_speculation >>>> # 36.4 % tma_frontend_bound >>>> 480,791,142 cpu_core/topdown-retiring/ # 35.5 % tma_backend_bound >>>> 186,323,991 cpu_core/topdown-bad-spec/ >>>> 65,070,590 cpu_core/topdown-heavy-ops/ # 20.1 % tma_retiring >>>> 871,733,444 cpu_core/topdown-fe-bound/ >>>> 848,286,598 cpu_core/topdown-be-bound/ >>>> 260,936,456 cpu_atom/TOPDOWN_RETIRING.ALL/ # 12.4 % tma_retiring >>>> # 17.6 % tma_bad_speculation >>>> 419,576,513 cpu_atom/CPU_CLK_UNHALTED.CORE/ >>>> 797,132,597 cpu_atom/TOPDOWN_FE_BOUND.ALL/ # 38.0 % tma_frontend_bound >>>> 3,055,447 cpu_core/INT_MISC.UOP_DROPPING/ >>>> 671,014,164 cpu_atom/TOPDOWN_BE_BOUND.ALL/ # 32.0 % tma_backend_bound >>>> ``` >>> It looks the output of cpu_core and cpu_atom events are mixed together, >>> like the "cpu_core/INT_MISC.UOP_DROPPING/". Could we resort the events and >>> separate the cpu_core and cpu_atom events output? It would make the output >>> more read-friendly. Thanks. >> So the metrics are tagged as to not group the events: >> https://web.git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json?h=perf-tools-next#n117 >> Running with each metric causes the output to be: >> ``` >> $ perf stat -M tma_bad_speculation,tma_backend_bound,tma_frontend_bound,tma_retiring >> -a sleep 1 >> >> Performance counter stats for 'system wide': >> >> 1,615,145,897 cpu_core/TOPDOWN.SLOTS/ # 8.1 % >> tma_bad_speculation >> # 42.5 % >> tma_frontend_bound (49.89%) >> 243,037,087 cpu_core/topdown-retiring/ # 34.5 % >> tma_backend_bound (49.89%) >> 129,341,306 cpu_core/topdown-bad-spec/ >> (49.89%) >> 2,679,894 cpu_core/INT_MISC.UOP_DROPPING/ >> (49.89%) >> 696,940,348 cpu_core/topdown-fe-bound/ >> (49.89%) >> 563,319,011 cpu_core/topdown-be-bound/ >> (49.89%) >> 1,795,034,847 cpu_core/slots/ >> (50.11%) >> 262,140,961 cpu_core/topdown-retiring/ >> (50.11%) >> 44,589,349 cpu_core/topdown-heavy-ops/ # 14.4 % >> tma_retiring (50.11%) >> 160,987,341 cpu_core/topdown-bad-spec/ >> (50.11%) >> 778,250,364 cpu_core/topdown-fe-bound/ >> (50.11%) >> 622,499,674 cpu_core/topdown-be-bound/ >> (50.11%) >> 90,849,750 cpu_atom/TOPDOWN_RETIRING.ALL/ # 8.1 % >> tma_retiring >> # 17.2 % >> tma_bad_speculation >> 223,878,243 cpu_atom/CPU_CLK_UNHALTED.CORE/ >> 423,068,733 cpu_atom/TOPDOWN_FE_BOUND.ALL/ # 37.8 % >> tma_frontend_bound >> 413,413,499 cpu_atom/TOPDOWN_BE_BOUND.ALL/ # 36.9 % >> tma_backend_bound >> ``` >> so you can see that it is the effect of not grouping the events that >> leads to the cpu_core and cpu_atom split. >> >> The code that does sorting/fixing/adding of events, primarily to fix >> topdown, is parse_events__sort_events_and_fix_groups: >> https://web.git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/util/parse-events.c?h=perf-tools-next#n2030 >> but I've tried to make that code respect the incoming evsel list order >> because if a user specifies an order then they generally expect it to >> be respected (unless invalid or because of topdown events). For >> --metric-only the event order doesn't really matter. >> >> Anyway, I think trying to fix this is out of scope for this patch >> series, although I agree with you about the readability. The behavior >> here matches old behavior such as: >> ``` >> $ perf --version >> perf version 6.16.12 >> $ perf stat -M TopdownL1 -a sleep 1 >> >> Performance counter stats for 'system wide': >> >> 11,086,754,658 cpu_core/TOPDOWN.SLOTS/ # 27.1 % >> tma_backend_bound >> # 7.5 % >> tma_bad_speculation >> # 36.5 % >> tma_frontend_bound >> # 28.9 % >> tma_retiring >> 3,219,475,010 cpu_core/topdown-retiring/ >> 820,655,931 cpu_core/topdown-bad-spec/ >> 418,883,912 cpu_core/topdown-heavy-ops/ >> 4,082,884,459 cpu_core/topdown-fe-bound/ >> 3,012,532,414 cpu_core/topdown-be-bound/ >> 1,030,171,196 cpu_atom/TOPDOWN_RETIRING.ALL/ # 17.4 % >> tma_retiring >> # 16.5 % >> tma_bad_speculation >> 1,185,093,601 cpu_atom/CPU_CLK_UNHALTED.CORE/ # 29.8 % >> tma_backend_bound >> # 36.4 % >> tma_frontend_bound >> 2,154,914,153 cpu_atom/TOPDOWN_FE_BOUND.ALL/ >> 14,988,684 cpu_core/INT_MISC.UOP_DROPPING/ >> 1,763,486,868 cpu_atom/TOPDOWN_BE_BOUND.ALL/ >> >> 1.004103365 seconds time elapsed >> ``` >> ie the cpu_core and cpu_atom mixing of events isn't a regression >> introduced here. There isn't a simple fix for the ordering, as we >> don't want to mess up the non-metric cases. I'm happy if you think >> things can be otherwise to make a change. > Agreed and it should be handled in a separate patch (series). Let's fix > problems one at a time. It makes sense. Thanks. > > Thanks, > Namhyung > >
© 2016 - 2026 Red Hat, Inc.