[PATCH v3 01/18] perf metricgroup: Add care to picking the evsel for displaying a metric

Ian Rogers posted 18 patches 2 months, 4 weeks ago
[PATCH v3 01/18] perf metricgroup: Add care to picking the evsel for displaying a metric
Posted by Ian Rogers 2 months, 4 weeks ago
Rather than using the first evsel in the matched events, try to find
the least shared non-tool evsel. The aim is to pick the first evsel
that typifies the metric within the list of metrics.

This addresses an issue where Default metric group metrics may lose
their counter value due to how the stat displaying hides counters for
default event/metric output.

For a metricgroup like TopdownL1 on an Intel Alderlake the change is,
before there are 4 events with metrics:
```
$ perf stat -M topdownL1 -a sleep 1

 Performance counter stats for 'system wide':

     7,782,334,296      cpu_core/TOPDOWN.SLOTS/          #     10.4 %  tma_bad_speculation
                                                  #     19.7 %  tma_frontend_bound
     2,668,927,977      cpu_core/topdown-retiring/       #     35.7 %  tma_backend_bound
                                                  #     34.1 %  tma_retiring
       803,623,987      cpu_core/topdown-bad-spec/
       167,514,386      cpu_core/topdown-heavy-ops/
     1,555,265,776      cpu_core/topdown-fe-bound/
     2,792,733,013      cpu_core/topdown-be-bound/
       279,769,310      cpu_atom/TOPDOWN_RETIRING.ALL/   #     12.2 %  tma_retiring
                                                  #     15.1 %  tma_bad_speculation
       457,917,232      cpu_atom/CPU_CLK_UNHALTED.CORE/  #     38.4 %  tma_backend_bound
                                                  #     34.2 %  tma_frontend_bound
       783,519,226      cpu_atom/TOPDOWN_FE_BOUND.ALL/
        10,790,192      cpu_core/INT_MISC.UOP_DROPPING/
       879,845,633      cpu_atom/TOPDOWN_BE_BOUND.ALL/
```

After there are 6 events with metrics:
```
$ perf stat -M topdownL1 -a sleep 1

 Performance counter stats for 'system wide':

     2,377,551,258      cpu_core/TOPDOWN.SLOTS/          #      7.9 %  tma_bad_speculation
                                                  #     36.4 %  tma_frontend_bound
       480,791,142      cpu_core/topdown-retiring/       #     35.5 %  tma_backend_bound
       186,323,991      cpu_core/topdown-bad-spec/
        65,070,590      cpu_core/topdown-heavy-ops/      #     20.1 %  tma_retiring
       871,733,444      cpu_core/topdown-fe-bound/
       848,286,598      cpu_core/topdown-be-bound/
       260,936,456      cpu_atom/TOPDOWN_RETIRING.ALL/   #     12.4 %  tma_retiring
                                                  #     17.6 %  tma_bad_speculation
       419,576,513      cpu_atom/CPU_CLK_UNHALTED.CORE/
       797,132,597      cpu_atom/TOPDOWN_FE_BOUND.ALL/   #     38.0 %  tma_frontend_bound
         3,055,447      cpu_core/INT_MISC.UOP_DROPPING/
       671,014,164      cpu_atom/TOPDOWN_BE_BOUND.ALL/   #     32.0 %  tma_backend_bound
```

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/perf/util/metricgroup.c | 48 ++++++++++++++++++++++++++++++++++-
 1 file changed, 47 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 48936e517803..76092ee26761 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -1323,6 +1323,51 @@ static int parse_ids(bool metric_no_merge, bool fake_pmu,
 	return ret;
 }
 
+/* How many times will a given evsel be used in a set of metrics? */
+static int count_uses(struct list_head *metric_list, struct evsel *evsel)
+{
+	const char *metric_id = evsel__metric_id(evsel);
+	struct metric *m;
+	int uses = 0;
+
+	list_for_each_entry(m, metric_list, nd) {
+		if (hashmap__find(m->pctx->ids, metric_id, NULL))
+			uses++;
+	}
+	return uses;
+}
+
+/*
+ * Select the evsel that stat-display will use to trigger shadow/metric
+ * printing. Pick the least shared non-tool evsel, encouraging metrics to be
+ * with a hardware counter that is specific to them.
+ */
+static struct evsel *pick_display_evsel(struct list_head *metric_list,
+					struct evsel **metric_events)
+{
+	struct evsel *selected = metric_events[0];
+	size_t selected_uses;
+	bool selected_is_tool;
+
+	if (!selected)
+		return NULL;
+
+	selected_uses = count_uses(metric_list, selected);
+	selected_is_tool = evsel__is_tool(selected);
+	for (int i = 1; metric_events[i]; i++) {
+		struct evsel *candidate = metric_events[i];
+		size_t candidate_uses = count_uses(metric_list, candidate);
+
+		if ((selected_is_tool && !evsel__is_tool(candidate)) ||
+		    (candidate_uses < selected_uses)) {
+			selected = candidate;
+			selected_uses = candidate_uses;
+			selected_is_tool = evsel__is_tool(selected);
+		}
+	}
+	return selected;
+}
+
 static int parse_groups(struct evlist *perf_evlist,
 			const char *pmu, const char *str,
 			bool metric_no_group,
@@ -1430,7 +1475,8 @@ static int parse_groups(struct evlist *perf_evlist,
 			goto out;
 		}
 
-		me = metricgroup__lookup(&perf_evlist->metric_events, metric_events[0],
+		me = metricgroup__lookup(&perf_evlist->metric_events,
+					 pick_display_evsel(&metric_list, metric_events),
 					 /*create=*/true);
 
 		expr = malloc(sizeof(struct metric_expr));
-- 
2.51.2.1041.gc1ab5b90ca-goog
Re: [PATCH v3 01/18] perf metricgroup: Add care to picking the evsel for displaying a metric
Posted by Mi, Dapeng 2 months, 4 weeks ago
On 11/11/2025 12:04 PM, Ian Rogers wrote:
> Rather than using the first evsel in the matched events, try to find
> the least shared non-tool evsel. The aim is to pick the first evsel
> that typifies the metric within the list of metrics.
>
> This addresses an issue where Default metric group metrics may lose
> their counter value due to how the stat displaying hides counters for
> default event/metric output.
>
> For a metricgroup like TopdownL1 on an Intel Alderlake the change is,
> before there are 4 events with metrics:
> ```
> $ perf stat -M topdownL1 -a sleep 1
>
>  Performance counter stats for 'system wide':
>
>      7,782,334,296      cpu_core/TOPDOWN.SLOTS/          #     10.4 %  tma_bad_speculation
>                                                   #     19.7 %  tma_frontend_bound
>      2,668,927,977      cpu_core/topdown-retiring/       #     35.7 %  tma_backend_bound
>                                                   #     34.1 %  tma_retiring
>        803,623,987      cpu_core/topdown-bad-spec/
>        167,514,386      cpu_core/topdown-heavy-ops/
>      1,555,265,776      cpu_core/topdown-fe-bound/
>      2,792,733,013      cpu_core/topdown-be-bound/
>        279,769,310      cpu_atom/TOPDOWN_RETIRING.ALL/   #     12.2 %  tma_retiring
>                                                   #     15.1 %  tma_bad_speculation
>        457,917,232      cpu_atom/CPU_CLK_UNHALTED.CORE/  #     38.4 %  tma_backend_bound
>                                                   #     34.2 %  tma_frontend_bound
>        783,519,226      cpu_atom/TOPDOWN_FE_BOUND.ALL/
>         10,790,192      cpu_core/INT_MISC.UOP_DROPPING/
>        879,845,633      cpu_atom/TOPDOWN_BE_BOUND.ALL/
> ```
>
> After there are 6 events with metrics:
> ```
> $ perf stat -M topdownL1 -a sleep 1
>
>  Performance counter stats for 'system wide':
>
>      2,377,551,258      cpu_core/TOPDOWN.SLOTS/          #      7.9 %  tma_bad_speculation
>                                                   #     36.4 %  tma_frontend_bound
>        480,791,142      cpu_core/topdown-retiring/       #     35.5 %  tma_backend_bound
>        186,323,991      cpu_core/topdown-bad-spec/
>         65,070,590      cpu_core/topdown-heavy-ops/      #     20.1 %  tma_retiring
>        871,733,444      cpu_core/topdown-fe-bound/
>        848,286,598      cpu_core/topdown-be-bound/
>        260,936,456      cpu_atom/TOPDOWN_RETIRING.ALL/   #     12.4 %  tma_retiring
>                                                   #     17.6 %  tma_bad_speculation
>        419,576,513      cpu_atom/CPU_CLK_UNHALTED.CORE/
>        797,132,597      cpu_atom/TOPDOWN_FE_BOUND.ALL/   #     38.0 %  tma_frontend_bound
>          3,055,447      cpu_core/INT_MISC.UOP_DROPPING/
>        671,014,164      cpu_atom/TOPDOWN_BE_BOUND.ALL/   #     32.0 %  tma_backend_bound
> ```

It looks the output of cpu_core and cpu_atom events are mixed together,
like the "cpu_core/INT_MISC.UOP_DROPPING/". Could we resort the events and
separate the cpu_core and cpu_atom events output? It would make the output
more read-friendly. Thanks.


>
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
>  tools/perf/util/metricgroup.c | 48 ++++++++++++++++++++++++++++++++++-
>  1 file changed, 47 insertions(+), 1 deletion(-)
>
> diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
> index 48936e517803..76092ee26761 100644
> --- a/tools/perf/util/metricgroup.c
> +++ b/tools/perf/util/metricgroup.c
> @@ -1323,6 +1323,51 @@ static int parse_ids(bool metric_no_merge, bool fake_pmu,
>  	return ret;
>  }
>  
> +/* How many times will a given evsel be used in a set of metrics? */
> +static int count_uses(struct list_head *metric_list, struct evsel *evsel)
> +{
> +	const char *metric_id = evsel__metric_id(evsel);
> +	struct metric *m;
> +	int uses = 0;
> +
> +	list_for_each_entry(m, metric_list, nd) {
> +		if (hashmap__find(m->pctx->ids, metric_id, NULL))
> +			uses++;
> +	}
> +	return uses;
> +}
> +
> +/*
> + * Select the evsel that stat-display will use to trigger shadow/metric
> + * printing. Pick the least shared non-tool evsel, encouraging metrics to be
> + * with a hardware counter that is specific to them.
> + */
> +static struct evsel *pick_display_evsel(struct list_head *metric_list,
> +					struct evsel **metric_events)
> +{
> +	struct evsel *selected = metric_events[0];
> +	size_t selected_uses;
> +	bool selected_is_tool;
> +
> +	if (!selected)
> +		return NULL;
> +
> +	selected_uses = count_uses(metric_list, selected);
> +	selected_is_tool = evsel__is_tool(selected);
> +	for (int i = 1; metric_events[i]; i++) {
> +		struct evsel *candidate = metric_events[i];
> +		size_t candidate_uses = count_uses(metric_list, candidate);
> +
> +		if ((selected_is_tool && !evsel__is_tool(candidate)) ||
> +		    (candidate_uses < selected_uses)) {
> +			selected = candidate;
> +			selected_uses = candidate_uses;
> +			selected_is_tool = evsel__is_tool(selected);
> +		}
> +	}
> +	return selected;
> +}
> +
>  static int parse_groups(struct evlist *perf_evlist,
>  			const char *pmu, const char *str,
>  			bool metric_no_group,
> @@ -1430,7 +1475,8 @@ static int parse_groups(struct evlist *perf_evlist,
>  			goto out;
>  		}
>  
> -		me = metricgroup__lookup(&perf_evlist->metric_events, metric_events[0],
> +		me = metricgroup__lookup(&perf_evlist->metric_events,
> +					 pick_display_evsel(&metric_list, metric_events),
>  					 /*create=*/true);
>  
>  		expr = malloc(sizeof(struct metric_expr));
Re: [PATCH v3 01/18] perf metricgroup: Add care to picking the evsel for displaying a metric
Posted by Ian Rogers 2 months, 4 weeks ago
On Tue, Nov 11, 2025 at 12:15 AM Mi, Dapeng <dapeng1.mi@linux.intel.com> wrote:
>
>
> On 11/11/2025 12:04 PM, Ian Rogers wrote:
> > Rather than using the first evsel in the matched events, try to find
> > the least shared non-tool evsel. The aim is to pick the first evsel
> > that typifies the metric within the list of metrics.
> >
> > This addresses an issue where Default metric group metrics may lose
> > their counter value due to how the stat displaying hides counters for
> > default event/metric output.
> >
> > For a metricgroup like TopdownL1 on an Intel Alderlake the change is,
> > before there are 4 events with metrics:
> > ```
> > $ perf stat -M topdownL1 -a sleep 1
> >
> >  Performance counter stats for 'system wide':
> >
> >      7,782,334,296      cpu_core/TOPDOWN.SLOTS/          #     10.4 %  tma_bad_speculation
> >                                                   #     19.7 %  tma_frontend_bound
> >      2,668,927,977      cpu_core/topdown-retiring/       #     35.7 %  tma_backend_bound
> >                                                   #     34.1 %  tma_retiring
> >        803,623,987      cpu_core/topdown-bad-spec/
> >        167,514,386      cpu_core/topdown-heavy-ops/
> >      1,555,265,776      cpu_core/topdown-fe-bound/
> >      2,792,733,013      cpu_core/topdown-be-bound/
> >        279,769,310      cpu_atom/TOPDOWN_RETIRING.ALL/   #     12.2 %  tma_retiring
> >                                                   #     15.1 %  tma_bad_speculation
> >        457,917,232      cpu_atom/CPU_CLK_UNHALTED.CORE/  #     38.4 %  tma_backend_bound
> >                                                   #     34.2 %  tma_frontend_bound
> >        783,519,226      cpu_atom/TOPDOWN_FE_BOUND.ALL/
> >         10,790,192      cpu_core/INT_MISC.UOP_DROPPING/
> >        879,845,633      cpu_atom/TOPDOWN_BE_BOUND.ALL/
> > ```
> >
> > After there are 6 events with metrics:
> > ```
> > $ perf stat -M topdownL1 -a sleep 1
> >
> >  Performance counter stats for 'system wide':
> >
> >      2,377,551,258      cpu_core/TOPDOWN.SLOTS/          #      7.9 %  tma_bad_speculation
> >                                                   #     36.4 %  tma_frontend_bound
> >        480,791,142      cpu_core/topdown-retiring/       #     35.5 %  tma_backend_bound
> >        186,323,991      cpu_core/topdown-bad-spec/
> >         65,070,590      cpu_core/topdown-heavy-ops/      #     20.1 %  tma_retiring
> >        871,733,444      cpu_core/topdown-fe-bound/
> >        848,286,598      cpu_core/topdown-be-bound/
> >        260,936,456      cpu_atom/TOPDOWN_RETIRING.ALL/   #     12.4 %  tma_retiring
> >                                                   #     17.6 %  tma_bad_speculation
> >        419,576,513      cpu_atom/CPU_CLK_UNHALTED.CORE/
> >        797,132,597      cpu_atom/TOPDOWN_FE_BOUND.ALL/   #     38.0 %  tma_frontend_bound
> >          3,055,447      cpu_core/INT_MISC.UOP_DROPPING/
> >        671,014,164      cpu_atom/TOPDOWN_BE_BOUND.ALL/   #     32.0 %  tma_backend_bound
> > ```
>
> It looks the output of cpu_core and cpu_atom events are mixed together,
> like the "cpu_core/INT_MISC.UOP_DROPPING/". Could we resort the events and
> separate the cpu_core and cpu_atom events output? It would make the output
> more read-friendly. Thanks.

So the metrics are tagged as to not group the events:
https://web.git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json?h=perf-tools-next#n117
Running with each metric causes the output to be:
```
$ perf stat -M tma_bad_speculation,tma_backend_bound,tma_frontend_bound,tma_retiring
-a sleep 1

 Performance counter stats for 'system wide':

     1,615,145,897      cpu_core/TOPDOWN.SLOTS/          #      8.1 %
tma_bad_speculation
                                                  #     42.5 %
tma_frontend_bound       (49.89%)
       243,037,087      cpu_core/topdown-retiring/       #     34.5 %
tma_backend_bound        (49.89%)
       129,341,306      cpu_core/topdown-bad-spec/
                         (49.89%)
         2,679,894      cpu_core/INT_MISC.UOP_DROPPING/
                         (49.89%)
       696,940,348      cpu_core/topdown-fe-bound/
                         (49.89%)
       563,319,011      cpu_core/topdown-be-bound/
                         (49.89%)
     1,795,034,847      cpu_core/slots/
                         (50.11%)
       262,140,961      cpu_core/topdown-retiring/
                         (50.11%)
        44,589,349      cpu_core/topdown-heavy-ops/      #     14.4 %
tma_retiring             (50.11%)
       160,987,341      cpu_core/topdown-bad-spec/
                         (50.11%)
       778,250,364      cpu_core/topdown-fe-bound/
                         (50.11%)
       622,499,674      cpu_core/topdown-be-bound/
                         (50.11%)
        90,849,750      cpu_atom/TOPDOWN_RETIRING.ALL/   #      8.1 %
tma_retiring
                                                  #     17.2 %
tma_bad_speculation
       223,878,243      cpu_atom/CPU_CLK_UNHALTED.CORE/
       423,068,733      cpu_atom/TOPDOWN_FE_BOUND.ALL/   #     37.8 %
tma_frontend_bound
       413,413,499      cpu_atom/TOPDOWN_BE_BOUND.ALL/   #     36.9 %
tma_backend_bound
```
so you can see that it is the effect of not grouping the events that
leads to the cpu_core and cpu_atom split.

The code that does sorting/fixing/adding of events, primarily to fix
topdown, is parse_events__sort_events_and_fix_groups:
https://web.git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/util/parse-events.c?h=perf-tools-next#n2030
but I've tried to make that code respect the incoming evsel list order
because if a user specifies an order then they generally expect it to
be respected (unless invalid or because of topdown events). For
--metric-only the event order doesn't really matter.

Anyway, I think trying to fix this is out of scope for this patch
series, although I agree with you about the readability. The behavior
here matches old behavior such as:
```
$ perf --version
perf version 6.16.12
$ perf stat -M TopdownL1 -a sleep 1

 Performance counter stats for 'system wide':

    11,086,754,658      cpu_core/TOPDOWN.SLOTS/          #     27.1 %
tma_backend_bound
                                                  #      7.5 %
tma_bad_speculation
                                                  #     36.5 %
tma_frontend_bound
                                                  #     28.9 %
tma_retiring
     3,219,475,010      cpu_core/topdown-retiring/
       820,655,931      cpu_core/topdown-bad-spec/
       418,883,912      cpu_core/topdown-heavy-ops/
     4,082,884,459      cpu_core/topdown-fe-bound/
     3,012,532,414      cpu_core/topdown-be-bound/
     1,030,171,196      cpu_atom/TOPDOWN_RETIRING.ALL/   #     17.4 %
tma_retiring
                                                  #     16.5 %
tma_bad_speculation
     1,185,093,601      cpu_atom/CPU_CLK_UNHALTED.CORE/  #     29.8 %
tma_backend_bound
                                                  #     36.4 %
tma_frontend_bound
     2,154,914,153      cpu_atom/TOPDOWN_FE_BOUND.ALL/
        14,988,684      cpu_core/INT_MISC.UOP_DROPPING/
     1,763,486,868      cpu_atom/TOPDOWN_BE_BOUND.ALL/

       1.004103365 seconds time elapsed
```
ie the cpu_core and cpu_atom mixing of events isn't a regression
introduced here. There isn't a simple fix for the ordering, as we
don't want to mess up the non-metric cases. I'm happy if you think
things can be otherwise to make a change.

Thanks,
Ian
Re: [PATCH v3 01/18] perf metricgroup: Add care to picking the evsel for displaying a metric
Posted by Namhyung Kim 2 months, 4 weeks ago
On Tue, Nov 11, 2025 at 09:20:30AM -0800, Ian Rogers wrote:
> On Tue, Nov 11, 2025 at 12:15 AM Mi, Dapeng <dapeng1.mi@linux.intel.com> wrote:
> >
> >
> > On 11/11/2025 12:04 PM, Ian Rogers wrote:
> > > Rather than using the first evsel in the matched events, try to find
> > > the least shared non-tool evsel. The aim is to pick the first evsel
> > > that typifies the metric within the list of metrics.
> > >
> > > This addresses an issue where Default metric group metrics may lose
> > > their counter value due to how the stat displaying hides counters for
> > > default event/metric output.
> > >
> > > For a metricgroup like TopdownL1 on an Intel Alderlake the change is,
> > > before there are 4 events with metrics:
> > > ```
> > > $ perf stat -M topdownL1 -a sleep 1
> > >
> > >  Performance counter stats for 'system wide':
> > >
> > >      7,782,334,296      cpu_core/TOPDOWN.SLOTS/          #     10.4 %  tma_bad_speculation
> > >                                                   #     19.7 %  tma_frontend_bound
> > >      2,668,927,977      cpu_core/topdown-retiring/       #     35.7 %  tma_backend_bound
> > >                                                   #     34.1 %  tma_retiring
> > >        803,623,987      cpu_core/topdown-bad-spec/
> > >        167,514,386      cpu_core/topdown-heavy-ops/
> > >      1,555,265,776      cpu_core/topdown-fe-bound/
> > >      2,792,733,013      cpu_core/topdown-be-bound/
> > >        279,769,310      cpu_atom/TOPDOWN_RETIRING.ALL/   #     12.2 %  tma_retiring
> > >                                                   #     15.1 %  tma_bad_speculation
> > >        457,917,232      cpu_atom/CPU_CLK_UNHALTED.CORE/  #     38.4 %  tma_backend_bound
> > >                                                   #     34.2 %  tma_frontend_bound
> > >        783,519,226      cpu_atom/TOPDOWN_FE_BOUND.ALL/
> > >         10,790,192      cpu_core/INT_MISC.UOP_DROPPING/
> > >        879,845,633      cpu_atom/TOPDOWN_BE_BOUND.ALL/
> > > ```
> > >
> > > After there are 6 events with metrics:
> > > ```
> > > $ perf stat -M topdownL1 -a sleep 1
> > >
> > >  Performance counter stats for 'system wide':
> > >
> > >      2,377,551,258      cpu_core/TOPDOWN.SLOTS/          #      7.9 %  tma_bad_speculation
> > >                                                   #     36.4 %  tma_frontend_bound
> > >        480,791,142      cpu_core/topdown-retiring/       #     35.5 %  tma_backend_bound
> > >        186,323,991      cpu_core/topdown-bad-spec/
> > >         65,070,590      cpu_core/topdown-heavy-ops/      #     20.1 %  tma_retiring
> > >        871,733,444      cpu_core/topdown-fe-bound/
> > >        848,286,598      cpu_core/topdown-be-bound/
> > >        260,936,456      cpu_atom/TOPDOWN_RETIRING.ALL/   #     12.4 %  tma_retiring
> > >                                                   #     17.6 %  tma_bad_speculation
> > >        419,576,513      cpu_atom/CPU_CLK_UNHALTED.CORE/
> > >        797,132,597      cpu_atom/TOPDOWN_FE_BOUND.ALL/   #     38.0 %  tma_frontend_bound
> > >          3,055,447      cpu_core/INT_MISC.UOP_DROPPING/
> > >        671,014,164      cpu_atom/TOPDOWN_BE_BOUND.ALL/   #     32.0 %  tma_backend_bound
> > > ```
> >
> > It looks the output of cpu_core and cpu_atom events are mixed together,
> > like the "cpu_core/INT_MISC.UOP_DROPPING/". Could we resort the events and
> > separate the cpu_core and cpu_atom events output? It would make the output
> > more read-friendly. Thanks.
> 
> So the metrics are tagged as to not group the events:
> https://web.git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json?h=perf-tools-next#n117
> Running with each metric causes the output to be:
> ```
> $ perf stat -M tma_bad_speculation,tma_backend_bound,tma_frontend_bound,tma_retiring
> -a sleep 1
> 
>  Performance counter stats for 'system wide':
> 
>      1,615,145,897      cpu_core/TOPDOWN.SLOTS/          #      8.1 %
> tma_bad_speculation
>                                                   #     42.5 %
> tma_frontend_bound       (49.89%)
>        243,037,087      cpu_core/topdown-retiring/       #     34.5 %
> tma_backend_bound        (49.89%)
>        129,341,306      cpu_core/topdown-bad-spec/
>                          (49.89%)
>          2,679,894      cpu_core/INT_MISC.UOP_DROPPING/
>                          (49.89%)
>        696,940,348      cpu_core/topdown-fe-bound/
>                          (49.89%)
>        563,319,011      cpu_core/topdown-be-bound/
>                          (49.89%)
>      1,795,034,847      cpu_core/slots/
>                          (50.11%)
>        262,140,961      cpu_core/topdown-retiring/
>                          (50.11%)
>         44,589,349      cpu_core/topdown-heavy-ops/      #     14.4 %
> tma_retiring             (50.11%)
>        160,987,341      cpu_core/topdown-bad-spec/
>                          (50.11%)
>        778,250,364      cpu_core/topdown-fe-bound/
>                          (50.11%)
>        622,499,674      cpu_core/topdown-be-bound/
>                          (50.11%)
>         90,849,750      cpu_atom/TOPDOWN_RETIRING.ALL/   #      8.1 %
> tma_retiring
>                                                   #     17.2 %
> tma_bad_speculation
>        223,878,243      cpu_atom/CPU_CLK_UNHALTED.CORE/
>        423,068,733      cpu_atom/TOPDOWN_FE_BOUND.ALL/   #     37.8 %
> tma_frontend_bound
>        413,413,499      cpu_atom/TOPDOWN_BE_BOUND.ALL/   #     36.9 %
> tma_backend_bound
> ```
> so you can see that it is the effect of not grouping the events that
> leads to the cpu_core and cpu_atom split.
> 
> The code that does sorting/fixing/adding of events, primarily to fix
> topdown, is parse_events__sort_events_and_fix_groups:
> https://web.git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/util/parse-events.c?h=perf-tools-next#n2030
> but I've tried to make that code respect the incoming evsel list order
> because if a user specifies an order then they generally expect it to
> be respected (unless invalid or because of topdown events). For
> --metric-only the event order doesn't really matter.
> 
> Anyway, I think trying to fix this is out of scope for this patch
> series, although I agree with you about the readability. The behavior
> here matches old behavior such as:
> ```
> $ perf --version
> perf version 6.16.12
> $ perf stat -M TopdownL1 -a sleep 1
> 
>  Performance counter stats for 'system wide':
> 
>     11,086,754,658      cpu_core/TOPDOWN.SLOTS/          #     27.1 %
> tma_backend_bound
>                                                   #      7.5 %
> tma_bad_speculation
>                                                   #     36.5 %
> tma_frontend_bound
>                                                   #     28.9 %
> tma_retiring
>      3,219,475,010      cpu_core/topdown-retiring/
>        820,655,931      cpu_core/topdown-bad-spec/
>        418,883,912      cpu_core/topdown-heavy-ops/
>      4,082,884,459      cpu_core/topdown-fe-bound/
>      3,012,532,414      cpu_core/topdown-be-bound/
>      1,030,171,196      cpu_atom/TOPDOWN_RETIRING.ALL/   #     17.4 %
> tma_retiring
>                                                   #     16.5 %
> tma_bad_speculation
>      1,185,093,601      cpu_atom/CPU_CLK_UNHALTED.CORE/  #     29.8 %
> tma_backend_bound
>                                                   #     36.4 %
> tma_frontend_bound
>      2,154,914,153      cpu_atom/TOPDOWN_FE_BOUND.ALL/
>         14,988,684      cpu_core/INT_MISC.UOP_DROPPING/
>      1,763,486,868      cpu_atom/TOPDOWN_BE_BOUND.ALL/
> 
>        1.004103365 seconds time elapsed
> ```
> ie the cpu_core and cpu_atom mixing of events isn't a regression
> introduced here. There isn't a simple fix for the ordering, as we
> don't want to mess up the non-metric cases. I'm happy if you think
> things can be otherwise to make a change.

Agreed and it should be handled in a separate patch (series).  Let's fix
problems one at a time.

Thanks,
Namhyung

Re: [PATCH v3 01/18] perf metricgroup: Add care to picking the evsel for displaying a metric
Posted by Mi, Dapeng 2 months, 4 weeks ago
On 11/12/2025 3:05 AM, Namhyung Kim wrote:
> On Tue, Nov 11, 2025 at 09:20:30AM -0800, Ian Rogers wrote:
>> On Tue, Nov 11, 2025 at 12:15 AM Mi, Dapeng <dapeng1.mi@linux.intel.com> wrote:
>>>
>>> On 11/11/2025 12:04 PM, Ian Rogers wrote:
>>>> Rather than using the first evsel in the matched events, try to find
>>>> the least shared non-tool evsel. The aim is to pick the first evsel
>>>> that typifies the metric within the list of metrics.
>>>>
>>>> This addresses an issue where Default metric group metrics may lose
>>>> their counter value due to how the stat displaying hides counters for
>>>> default event/metric output.
>>>>
>>>> For a metricgroup like TopdownL1 on an Intel Alderlake the change is,
>>>> before there are 4 events with metrics:
>>>> ```
>>>> $ perf stat -M topdownL1 -a sleep 1
>>>>
>>>>  Performance counter stats for 'system wide':
>>>>
>>>>      7,782,334,296      cpu_core/TOPDOWN.SLOTS/          #     10.4 %  tma_bad_speculation
>>>>                                                   #     19.7 %  tma_frontend_bound
>>>>      2,668,927,977      cpu_core/topdown-retiring/       #     35.7 %  tma_backend_bound
>>>>                                                   #     34.1 %  tma_retiring
>>>>        803,623,987      cpu_core/topdown-bad-spec/
>>>>        167,514,386      cpu_core/topdown-heavy-ops/
>>>>      1,555,265,776      cpu_core/topdown-fe-bound/
>>>>      2,792,733,013      cpu_core/topdown-be-bound/
>>>>        279,769,310      cpu_atom/TOPDOWN_RETIRING.ALL/   #     12.2 %  tma_retiring
>>>>                                                   #     15.1 %  tma_bad_speculation
>>>>        457,917,232      cpu_atom/CPU_CLK_UNHALTED.CORE/  #     38.4 %  tma_backend_bound
>>>>                                                   #     34.2 %  tma_frontend_bound
>>>>        783,519,226      cpu_atom/TOPDOWN_FE_BOUND.ALL/
>>>>         10,790,192      cpu_core/INT_MISC.UOP_DROPPING/
>>>>        879,845,633      cpu_atom/TOPDOWN_BE_BOUND.ALL/
>>>> ```
>>>>
>>>> After there are 6 events with metrics:
>>>> ```
>>>> $ perf stat -M topdownL1 -a sleep 1
>>>>
>>>>  Performance counter stats for 'system wide':
>>>>
>>>>      2,377,551,258      cpu_core/TOPDOWN.SLOTS/          #      7.9 %  tma_bad_speculation
>>>>                                                   #     36.4 %  tma_frontend_bound
>>>>        480,791,142      cpu_core/topdown-retiring/       #     35.5 %  tma_backend_bound
>>>>        186,323,991      cpu_core/topdown-bad-spec/
>>>>         65,070,590      cpu_core/topdown-heavy-ops/      #     20.1 %  tma_retiring
>>>>        871,733,444      cpu_core/topdown-fe-bound/
>>>>        848,286,598      cpu_core/topdown-be-bound/
>>>>        260,936,456      cpu_atom/TOPDOWN_RETIRING.ALL/   #     12.4 %  tma_retiring
>>>>                                                   #     17.6 %  tma_bad_speculation
>>>>        419,576,513      cpu_atom/CPU_CLK_UNHALTED.CORE/
>>>>        797,132,597      cpu_atom/TOPDOWN_FE_BOUND.ALL/   #     38.0 %  tma_frontend_bound
>>>>          3,055,447      cpu_core/INT_MISC.UOP_DROPPING/
>>>>        671,014,164      cpu_atom/TOPDOWN_BE_BOUND.ALL/   #     32.0 %  tma_backend_bound
>>>> ```
>>> It looks the output of cpu_core and cpu_atom events are mixed together,
>>> like the "cpu_core/INT_MISC.UOP_DROPPING/". Could we resort the events and
>>> separate the cpu_core and cpu_atom events output? It would make the output
>>> more read-friendly. Thanks.
>> So the metrics are tagged as to not group the events:
>> https://web.git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json?h=perf-tools-next#n117
>> Running with each metric causes the output to be:
>> ```
>> $ perf stat -M tma_bad_speculation,tma_backend_bound,tma_frontend_bound,tma_retiring
>> -a sleep 1
>>
>>  Performance counter stats for 'system wide':
>>
>>      1,615,145,897      cpu_core/TOPDOWN.SLOTS/          #      8.1 %
>> tma_bad_speculation
>>                                                   #     42.5 %
>> tma_frontend_bound       (49.89%)
>>        243,037,087      cpu_core/topdown-retiring/       #     34.5 %
>> tma_backend_bound        (49.89%)
>>        129,341,306      cpu_core/topdown-bad-spec/
>>                          (49.89%)
>>          2,679,894      cpu_core/INT_MISC.UOP_DROPPING/
>>                          (49.89%)
>>        696,940,348      cpu_core/topdown-fe-bound/
>>                          (49.89%)
>>        563,319,011      cpu_core/topdown-be-bound/
>>                          (49.89%)
>>      1,795,034,847      cpu_core/slots/
>>                          (50.11%)
>>        262,140,961      cpu_core/topdown-retiring/
>>                          (50.11%)
>>         44,589,349      cpu_core/topdown-heavy-ops/      #     14.4 %
>> tma_retiring             (50.11%)
>>        160,987,341      cpu_core/topdown-bad-spec/
>>                          (50.11%)
>>        778,250,364      cpu_core/topdown-fe-bound/
>>                          (50.11%)
>>        622,499,674      cpu_core/topdown-be-bound/
>>                          (50.11%)
>>         90,849,750      cpu_atom/TOPDOWN_RETIRING.ALL/   #      8.1 %
>> tma_retiring
>>                                                   #     17.2 %
>> tma_bad_speculation
>>        223,878,243      cpu_atom/CPU_CLK_UNHALTED.CORE/
>>        423,068,733      cpu_atom/TOPDOWN_FE_BOUND.ALL/   #     37.8 %
>> tma_frontend_bound
>>        413,413,499      cpu_atom/TOPDOWN_BE_BOUND.ALL/   #     36.9 %
>> tma_backend_bound
>> ```
>> so you can see that it is the effect of not grouping the events that
>> leads to the cpu_core and cpu_atom split.
>>
>> The code that does sorting/fixing/adding of events, primarily to fix
>> topdown, is parse_events__sort_events_and_fix_groups:
>> https://web.git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/util/parse-events.c?h=perf-tools-next#n2030
>> but I've tried to make that code respect the incoming evsel list order
>> because if a user specifies an order then they generally expect it to
>> be respected (unless invalid or because of topdown events). For
>> --metric-only the event order doesn't really matter.
>>
>> Anyway, I think trying to fix this is out of scope for this patch
>> series, although I agree with you about the readability. The behavior
>> here matches old behavior such as:
>> ```
>> $ perf --version
>> perf version 6.16.12
>> $ perf stat -M TopdownL1 -a sleep 1
>>
>>  Performance counter stats for 'system wide':
>>
>>     11,086,754,658      cpu_core/TOPDOWN.SLOTS/          #     27.1 %
>> tma_backend_bound
>>                                                   #      7.5 %
>> tma_bad_speculation
>>                                                   #     36.5 %
>> tma_frontend_bound
>>                                                   #     28.9 %
>> tma_retiring
>>      3,219,475,010      cpu_core/topdown-retiring/
>>        820,655,931      cpu_core/topdown-bad-spec/
>>        418,883,912      cpu_core/topdown-heavy-ops/
>>      4,082,884,459      cpu_core/topdown-fe-bound/
>>      3,012,532,414      cpu_core/topdown-be-bound/
>>      1,030,171,196      cpu_atom/TOPDOWN_RETIRING.ALL/   #     17.4 %
>> tma_retiring
>>                                                   #     16.5 %
>> tma_bad_speculation
>>      1,185,093,601      cpu_atom/CPU_CLK_UNHALTED.CORE/  #     29.8 %
>> tma_backend_bound
>>                                                   #     36.4 %
>> tma_frontend_bound
>>      2,154,914,153      cpu_atom/TOPDOWN_FE_BOUND.ALL/
>>         14,988,684      cpu_core/INT_MISC.UOP_DROPPING/
>>      1,763,486,868      cpu_atom/TOPDOWN_BE_BOUND.ALL/
>>
>>        1.004103365 seconds time elapsed
>> ```
>> ie the cpu_core and cpu_atom mixing of events isn't a regression
>> introduced here. There isn't a simple fix for the ordering, as we
>> don't want to mess up the non-metric cases. I'm happy if you think
>> things can be otherwise to make a change.
> Agreed and it should be handled in a separate patch (series).  Let's fix
> problems one at a time.

It makes sense. Thanks.


>
> Thanks,
> Namhyung
>
>