[v3] Switch the default perf stat metrics to json

[PATCH v3 01/18] perf metricgroup: Add care to picking the evsel for displaying a metric

Posted by Ian Rogers 7 hours ago

Rather than using the first evsel in the matched events, try to find
the least shared non-tool evsel. The aim is to pick the first evsel
that typifies the metric within the list of metrics.

This addresses an issue where Default metric group metrics may lose
their counter value due to how the stat displaying hides counters for
default event/metric output.

For a metricgroup like TopdownL1 on an Intel Alderlake the change is,
before there are 4 events with metrics:
```
$ perf stat -M topdownL1 -a sleep 1

 Performance counter stats for 'system wide':

     7,782,334,296      cpu_core/TOPDOWN.SLOTS/          #     10.4 %  tma_bad_speculation
                                                  #     19.7 %  tma_frontend_bound
     2,668,927,977      cpu_core/topdown-retiring/       #     35.7 %  tma_backend_bound
                                                  #     34.1 %  tma_retiring
       803,623,987      cpu_core/topdown-bad-spec/
       167,514,386      cpu_core/topdown-heavy-ops/
     1,555,265,776      cpu_core/topdown-fe-bound/
     2,792,733,013      cpu_core/topdown-be-bound/
       279,769,310      cpu_atom/TOPDOWN_RETIRING.ALL/   #     12.2 %  tma_retiring
                                                  #     15.1 %  tma_bad_speculation
       457,917,232      cpu_atom/CPU_CLK_UNHALTED.CORE/  #     38.4 %  tma_backend_bound
                                                  #     34.2 %  tma_frontend_bound
       783,519,226      cpu_atom/TOPDOWN_FE_BOUND.ALL/
        10,790,192      cpu_core/INT_MISC.UOP_DROPPING/
       879,845,633      cpu_atom/TOPDOWN_BE_BOUND.ALL/
```

After there are 6 events with metrics:
```
$ perf stat -M topdownL1 -a sleep 1

 Performance counter stats for 'system wide':

     2,377,551,258      cpu_core/TOPDOWN.SLOTS/          #      7.9 %  tma_bad_speculation
                                                  #     36.4 %  tma_frontend_bound
       480,791,142      cpu_core/topdown-retiring/       #     35.5 %  tma_backend_bound
       186,323,991      cpu_core/topdown-bad-spec/
        65,070,590      cpu_core/topdown-heavy-ops/      #     20.1 %  tma_retiring
       871,733,444      cpu_core/topdown-fe-bound/
       848,286,598      cpu_core/topdown-be-bound/
       260,936,456      cpu_atom/TOPDOWN_RETIRING.ALL/   #     12.4 %  tma_retiring
                                                  #     17.6 %  tma_bad_speculation
       419,576,513      cpu_atom/CPU_CLK_UNHALTED.CORE/
       797,132,597      cpu_atom/TOPDOWN_FE_BOUND.ALL/   #     38.0 %  tma_frontend_bound
         3,055,447      cpu_core/INT_MISC.UOP_DROPPING/
       671,014,164      cpu_atom/TOPDOWN_BE_BOUND.ALL/   #     32.0 %  tma_backend_bound
```

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/perf/util/metricgroup.c | 48 ++++++++++++++++++++++++++++++++++-
 1 file changed, 47 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 48936e517803..76092ee26761 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -1323,6 +1323,51 @@ static int parse_ids(bool metric_no_merge, bool fake_pmu,
 	return ret;
 }
 
+/* How many times will a given evsel be used in a set of metrics? */
+static int count_uses(struct list_head *metric_list, struct evsel *evsel)
+{
+	const char *metric_id = evsel__metric_id(evsel);
+	struct metric *m;
+	int uses = 0;
+
+	list_for_each_entry(m, metric_list, nd) {
+		if (hashmap__find(m->pctx->ids, metric_id, NULL))
+			uses++;
+	}
+	return uses;
+}
+
+/*
+ * Select the evsel that stat-display will use to trigger shadow/metric
+ * printing. Pick the least shared non-tool evsel, encouraging metrics to be
+ * with a hardware counter that is specific to them.
+ */
+static struct evsel *pick_display_evsel(struct list_head *metric_list,
+					struct evsel **metric_events)
+{
+	struct evsel *selected = metric_events[0];
+	size_t selected_uses;
+	bool selected_is_tool;
+
+	if (!selected)
+		return NULL;
+
+	selected_uses = count_uses(metric_list, selected);
+	selected_is_tool = evsel__is_tool(selected);
+	for (int i = 1; metric_events[i]; i++) {
+		struct evsel *candidate = metric_events[i];
+		size_t candidate_uses = count_uses(metric_list, candidate);
+
+		if ((selected_is_tool && !evsel__is_tool(candidate)) ||
+		    (candidate_uses < selected_uses)) {
+			selected = candidate;
+			selected_uses = candidate_uses;
+			selected_is_tool = evsel__is_tool(selected);
+		}
+	}
+	return selected;
+}
+
 static int parse_groups(struct evlist *perf_evlist,
 			const char *pmu, const char *str,
 			bool metric_no_group,
@@ -1430,7 +1475,8 @@ static int parse_groups(struct evlist *perf_evlist,
 			goto out;
 		}
 
-		me = metricgroup__lookup(&perf_evlist->metric_events, metric_events[0],
+		me = metricgroup__lookup(&perf_evlist->metric_events,
+					 pick_display_evsel(&metric_list, metric_events),
 					 /*create=*/true);
 
 		expr = malloc(sizeof(struct metric_expr));
-- 
2.51.2.1041.gc1ab5b90ca-goog

Re: [PATCH v3 01/18] perf metricgroup: Add care to picking the evsel for displaying a metric

Posted by Mi, Dapeng 3 hours ago

On 11/11/2025 12:04 PM, Ian Rogers wrote:
> Rather than using the first evsel in the matched events, try to find
> the least shared non-tool evsel. The aim is to pick the first evsel
> that typifies the metric within the list of metrics.
>
> This addresses an issue where Default metric group metrics may lose
> their counter value due to how the stat displaying hides counters for
> default event/metric output.
>
> For a metricgroup like TopdownL1 on an Intel Alderlake the change is,
> before there are 4 events with metrics:
> ```
> $ perf stat -M topdownL1 -a sleep 1
>
>  Performance counter stats for 'system wide':
>
>      7,782,334,296      cpu_core/TOPDOWN.SLOTS/          #     10.4 %  tma_bad_speculation
>                                                   #     19.7 %  tma_frontend_bound
>      2,668,927,977      cpu_core/topdown-retiring/       #     35.7 %  tma_backend_bound
>                                                   #     34.1 %  tma_retiring
>        803,623,987      cpu_core/topdown-bad-spec/
>        167,514,386      cpu_core/topdown-heavy-ops/
>      1,555,265,776      cpu_core/topdown-fe-bound/
>      2,792,733,013      cpu_core/topdown-be-bound/
>        279,769,310      cpu_atom/TOPDOWN_RETIRING.ALL/   #     12.2 %  tma_retiring
>                                                   #     15.1 %  tma_bad_speculation
>        457,917,232      cpu_atom/CPU_CLK_UNHALTED.CORE/  #     38.4 %  tma_backend_bound
>                                                   #     34.2 %  tma_frontend_bound
>        783,519,226      cpu_atom/TOPDOWN_FE_BOUND.ALL/
>         10,790,192      cpu_core/INT_MISC.UOP_DROPPING/
>        879,845,633      cpu_atom/TOPDOWN_BE_BOUND.ALL/
> ```
>
> After there are 6 events with metrics:
> ```
> $ perf stat -M topdownL1 -a sleep 1
>
>  Performance counter stats for 'system wide':
>
>      2,377,551,258      cpu_core/TOPDOWN.SLOTS/          #      7.9 %  tma_bad_speculation
>                                                   #     36.4 %  tma_frontend_bound
>        480,791,142      cpu_core/topdown-retiring/       #     35.5 %  tma_backend_bound
>        186,323,991      cpu_core/topdown-bad-spec/
>         65,070,590      cpu_core/topdown-heavy-ops/      #     20.1 %  tma_retiring
>        871,733,444      cpu_core/topdown-fe-bound/
>        848,286,598      cpu_core/topdown-be-bound/
>        260,936,456      cpu_atom/TOPDOWN_RETIRING.ALL/   #     12.4 %  tma_retiring
>                                                   #     17.6 %  tma_bad_speculation
>        419,576,513      cpu_atom/CPU_CLK_UNHALTED.CORE/
>        797,132,597      cpu_atom/TOPDOWN_FE_BOUND.ALL/   #     38.0 %  tma_frontend_bound
>          3,055,447      cpu_core/INT_MISC.UOP_DROPPING/
>        671,014,164      cpu_atom/TOPDOWN_BE_BOUND.ALL/   #     32.0 %  tma_backend_bound
> ```

It looks the output of cpu_core and cpu_atom events are mixed together,
like the "cpu_core/INT_MISC.UOP_DROPPING/". Could we resort the events and
separate the cpu_core and cpu_atom events output? It would make the output
more read-friendly. Thanks.


>
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
>  tools/perf/util/metricgroup.c | 48 ++++++++++++++++++++++++++++++++++-
>  1 file changed, 47 insertions(+), 1 deletion(-)
>
> diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
> index 48936e517803..76092ee26761 100644
> --- a/tools/perf/util/metricgroup.c
> +++ b/tools/perf/util/metricgroup.c
> @@ -1323,6 +1323,51 @@ static int parse_ids(bool metric_no_merge, bool fake_pmu,
>  	return ret;
>  }
>  
> +/* How many times will a given evsel be used in a set of metrics? */
> +static int count_uses(struct list_head *metric_list, struct evsel *evsel)
> +{
> +	const char *metric_id = evsel__metric_id(evsel);
> +	struct metric *m;
> +	int uses = 0;
> +
> +	list_for_each_entry(m, metric_list, nd) {
> +		if (hashmap__find(m->pctx->ids, metric_id, NULL))
> +			uses++;
> +	}
> +	return uses;
> +}
> +
> +/*
> + * Select the evsel that stat-display will use to trigger shadow/metric
> + * printing. Pick the least shared non-tool evsel, encouraging metrics to be
> + * with a hardware counter that is specific to them.
> + */
> +static struct evsel *pick_display_evsel(struct list_head *metric_list,
> +					struct evsel **metric_events)
> +{
> +	struct evsel *selected = metric_events[0];
> +	size_t selected_uses;
> +	bool selected_is_tool;
> +
> +	if (!selected)
> +		return NULL;
> +
> +	selected_uses = count_uses(metric_list, selected);
> +	selected_is_tool = evsel__is_tool(selected);
> +	for (int i = 1; metric_events[i]; i++) {
> +		struct evsel *candidate = metric_events[i];
> +		size_t candidate_uses = count_uses(metric_list, candidate);
> +
> +		if ((selected_is_tool && !evsel__is_tool(candidate)) ||
> +		    (candidate_uses < selected_uses)) {
> +			selected = candidate;
> +			selected_uses = candidate_uses;
> +			selected_is_tool = evsel__is_tool(selected);
> +		}
> +	}
> +	return selected;
> +}
> +
>  static int parse_groups(struct evlist *perf_evlist,
>  			const char *pmu, const char *str,
>  			bool metric_no_group,
> @@ -1430,7 +1475,8 @@ static int parse_groups(struct evlist *perf_evlist,
>  			goto out;
>  		}
>  
> -		me = metricgroup__lookup(&perf_evlist->metric_events, metric_events[0],
> +		me = metricgroup__lookup(&perf_evlist->metric_events,
> +					 pick_display_evsel(&metric_list, metric_events),
>  					 /*create=*/true);
>  
>  		expr = malloc(sizeof(struct metric_expr));