[v1] Switch the default perf stat metrics to json

[PATCH v1 05/22] perf metricgroup: Add care to picking the evsel for displaying a metric

Posted by Ian Rogers 3 months, 2 weeks ago

Rather than using the first evsel in the matched events, try to find
the least shared non-tool evsel. The aim is to pick the first evsel
that typifies the metric within the list of metrics.

This addresses an issue where Default metric group metrics may lose
their counter value due to how the stat displaying hides counters for
default event/metric output.

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/perf/util/metricgroup.c | 48 ++++++++++++++++++++++++++++++++++-
 1 file changed, 47 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 48936e517803..76092ee26761 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -1323,6 +1323,51 @@ static int parse_ids(bool metric_no_merge, bool fake_pmu,
 	return ret;
 }
 
+/* How many times will a given evsel be used in a set of metrics? */
+static int count_uses(struct list_head *metric_list, struct evsel *evsel)
+{
+	const char *metric_id = evsel__metric_id(evsel);
+	struct metric *m;
+	int uses = 0;
+
+	list_for_each_entry(m, metric_list, nd) {
+		if (hashmap__find(m->pctx->ids, metric_id, NULL))
+			uses++;
+	}
+	return uses;
+}
+
+/*
+ * Select the evsel that stat-display will use to trigger shadow/metric
+ * printing. Pick the least shared non-tool evsel, encouraging metrics to be
+ * with a hardware counter that is specific to them.
+ */
+static struct evsel *pick_display_evsel(struct list_head *metric_list,
+					struct evsel **metric_events)
+{
+	struct evsel *selected = metric_events[0];
+	size_t selected_uses;
+	bool selected_is_tool;
+
+	if (!selected)
+		return NULL;
+
+	selected_uses = count_uses(metric_list, selected);
+	selected_is_tool = evsel__is_tool(selected);
+	for (int i = 1; metric_events[i]; i++) {
+		struct evsel *candidate = metric_events[i];
+		size_t candidate_uses = count_uses(metric_list, candidate);
+
+		if ((selected_is_tool && !evsel__is_tool(candidate)) ||
+		    (candidate_uses < selected_uses)) {
+			selected = candidate;
+			selected_uses = candidate_uses;
+			selected_is_tool = evsel__is_tool(selected);
+		}
+	}
+	return selected;
+}
+
 static int parse_groups(struct evlist *perf_evlist,
 			const char *pmu, const char *str,
 			bool metric_no_group,
@@ -1430,7 +1475,8 @@ static int parse_groups(struct evlist *perf_evlist,
 			goto out;
 		}
 
-		me = metricgroup__lookup(&perf_evlist->metric_events, metric_events[0],
+		me = metricgroup__lookup(&perf_evlist->metric_events,
+					 pick_display_evsel(&metric_list, metric_events),
 					 /*create=*/true);
 
 		expr = malloc(sizeof(struct metric_expr));
-- 
2.51.1.821.gb6fe4d2222-goog

Re: [PATCH v1 05/22] perf metricgroup: Add care to picking the evsel for displaying a metric

Posted by Namhyung Kim 3 months ago

On Fri, Oct 24, 2025 at 10:58:40AM -0700, Ian Rogers wrote:
> Rather than using the first evsel in the matched events, try to find
> the least shared non-tool evsel. The aim is to pick the first evsel
> that typifies the metric within the list of metrics.
> 
> This addresses an issue where Default metric group metrics may lose
> their counter value due to how the stat displaying hides counters for
> default event/metric output.

Do you have a command line example to show impact of this change?

Thanks,
Namhyung

> 
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
>  tools/perf/util/metricgroup.c | 48 ++++++++++++++++++++++++++++++++++-
>  1 file changed, 47 insertions(+), 1 deletion(-)
> 
> diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
> index 48936e517803..76092ee26761 100644
> --- a/tools/perf/util/metricgroup.c
> +++ b/tools/perf/util/metricgroup.c
> @@ -1323,6 +1323,51 @@ static int parse_ids(bool metric_no_merge, bool fake_pmu,
>  	return ret;
>  }
>  
> +/* How many times will a given evsel be used in a set of metrics? */
> +static int count_uses(struct list_head *metric_list, struct evsel *evsel)
> +{
> +	const char *metric_id = evsel__metric_id(evsel);
> +	struct metric *m;
> +	int uses = 0;
> +
> +	list_for_each_entry(m, metric_list, nd) {
> +		if (hashmap__find(m->pctx->ids, metric_id, NULL))
> +			uses++;
> +	}
> +	return uses;
> +}
> +
> +/*
> + * Select the evsel that stat-display will use to trigger shadow/metric
> + * printing. Pick the least shared non-tool evsel, encouraging metrics to be
> + * with a hardware counter that is specific to them.
> + */
> +static struct evsel *pick_display_evsel(struct list_head *metric_list,
> +					struct evsel **metric_events)
> +{
> +	struct evsel *selected = metric_events[0];
> +	size_t selected_uses;
> +	bool selected_is_tool;
> +
> +	if (!selected)
> +		return NULL;
> +
> +	selected_uses = count_uses(metric_list, selected);
> +	selected_is_tool = evsel__is_tool(selected);
> +	for (int i = 1; metric_events[i]; i++) {
> +		struct evsel *candidate = metric_events[i];
> +		size_t candidate_uses = count_uses(metric_list, candidate);
> +
> +		if ((selected_is_tool && !evsel__is_tool(candidate)) ||
> +		    (candidate_uses < selected_uses)) {
> +			selected = candidate;
> +			selected_uses = candidate_uses;
> +			selected_is_tool = evsel__is_tool(selected);
> +		}
> +	}
> +	return selected;
> +}
> +
>  static int parse_groups(struct evlist *perf_evlist,
>  			const char *pmu, const char *str,
>  			bool metric_no_group,
> @@ -1430,7 +1475,8 @@ static int parse_groups(struct evlist *perf_evlist,
>  			goto out;
>  		}
>  
> -		me = metricgroup__lookup(&perf_evlist->metric_events, metric_events[0],
> +		me = metricgroup__lookup(&perf_evlist->metric_events,
> +					 pick_display_evsel(&metric_list, metric_events),
>  					 /*create=*/true);
>  
>  		expr = malloc(sizeof(struct metric_expr));
> -- 
> 2.51.1.821.gb6fe4d2222-goog
>

Re: [PATCH v1 05/22] perf metricgroup: Add care to picking the evsel for displaying a metric

Posted by Ian Rogers 3 months ago

On Mon, Nov 3, 2025 at 8:52 PM Namhyung Kim <namhyung@kernel.org> wrote:
>
> On Fri, Oct 24, 2025 at 10:58:40AM -0700, Ian Rogers wrote:
> > Rather than using the first evsel in the matched events, try to find
> > the least shared non-tool evsel. The aim is to pick the first evsel
> > that typifies the metric within the list of metrics.
> >
> > This addresses an issue where Default metric group metrics may lose
> > their counter value due to how the stat displaying hides counters for
> > default event/metric output.
>
> Do you have a command line example to show impact of this change?

You can just run a Topdown metricgroup on Intel to see differences,
but they are minor. The main impact is on the Default legacy metrics
as those have a counter then a metric, but without this change you get
everything grouped on the cpu-clock event and the formatting gets
broken. As --metric-only is popular when looking at a group of events
and the Default legacy metrics are added in subsequent changes it
didn't seem right to include the output (it either shows broken things
keeping to be somewhat broken or output from later patches).

Thanks,
Ian

> Thanks,
> Namhyung
>
> >
> > Signed-off-by: Ian Rogers <irogers@google.com>
> > ---
> >  tools/perf/util/metricgroup.c | 48 ++++++++++++++++++++++++++++++++++-
> >  1 file changed, 47 insertions(+), 1 deletion(-)
> >
> > diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
> > index 48936e517803..76092ee26761 100644
> > --- a/tools/perf/util/metricgroup.c
> > +++ b/tools/perf/util/metricgroup.c
> > @@ -1323,6 +1323,51 @@ static int parse_ids(bool metric_no_merge, bool fake_pmu,
> >       return ret;
> >  }
> >
> > +/* How many times will a given evsel be used in a set of metrics? */
> > +static int count_uses(struct list_head *metric_list, struct evsel *evsel)
> > +{
> > +     const char *metric_id = evsel__metric_id(evsel);
> > +     struct metric *m;
> > +     int uses = 0;
> > +
> > +     list_for_each_entry(m, metric_list, nd) {
> > +             if (hashmap__find(m->pctx->ids, metric_id, NULL))
> > +                     uses++;
> > +     }
> > +     return uses;
> > +}
> > +
> > +/*
> > + * Select the evsel that stat-display will use to trigger shadow/metric
> > + * printing. Pick the least shared non-tool evsel, encouraging metrics to be
> > + * with a hardware counter that is specific to them.
> > + */
> > +static struct evsel *pick_display_evsel(struct list_head *metric_list,
> > +                                     struct evsel **metric_events)
> > +{
> > +     struct evsel *selected = metric_events[0];
> > +     size_t selected_uses;
> > +     bool selected_is_tool;
> > +
> > +     if (!selected)
> > +             return NULL;
> > +
> > +     selected_uses = count_uses(metric_list, selected);
> > +     selected_is_tool = evsel__is_tool(selected);
> > +     for (int i = 1; metric_events[i]; i++) {
> > +             struct evsel *candidate = metric_events[i];
> > +             size_t candidate_uses = count_uses(metric_list, candidate);
> > +
> > +             if ((selected_is_tool && !evsel__is_tool(candidate)) ||
> > +                 (candidate_uses < selected_uses)) {
> > +                     selected = candidate;
> > +                     selected_uses = candidate_uses;
> > +                     selected_is_tool = evsel__is_tool(selected);
> > +             }
> > +     }
> > +     return selected;
> > +}
> > +
> >  static int parse_groups(struct evlist *perf_evlist,
> >                       const char *pmu, const char *str,
> >                       bool metric_no_group,
> > @@ -1430,7 +1475,8 @@ static int parse_groups(struct evlist *perf_evlist,
> >                       goto out;
> >               }
> >
> > -             me = metricgroup__lookup(&perf_evlist->metric_events, metric_events[0],
> > +             me = metricgroup__lookup(&perf_evlist->metric_events,
> > +                                      pick_display_evsel(&metric_list, metric_events),
> >                                        /*create=*/true);
> >
> >               expr = malloc(sizeof(struct metric_expr));
> > --
> > 2.51.1.821.gb6fe4d2222-goog
> >

Re: [PATCH v1 05/22] perf metricgroup: Add care to picking the evsel for displaying a metric

Posted by Namhyung Kim 3 months ago

On Mon, Nov 03, 2025 at 09:28:44PM -0800, Ian Rogers wrote:
> On Mon, Nov 3, 2025 at 8:52 PM Namhyung Kim <namhyung@kernel.org> wrote:
> >
> > On Fri, Oct 24, 2025 at 10:58:40AM -0700, Ian Rogers wrote:
> > > Rather than using the first evsel in the matched events, try to find
> > > the least shared non-tool evsel. The aim is to pick the first evsel
> > > that typifies the metric within the list of metrics.
> > >
> > > This addresses an issue where Default metric group metrics may lose
> > > their counter value due to how the stat displaying hides counters for
> > > default event/metric output.
> >
> > Do you have a command line example to show impact of this change?
> 
> You can just run a Topdown metricgroup on Intel to see differences,

Ok, before this change.

  $ perf stat -M topdownL1 true
  
   Performance counter stats for 'true':
  
           7,754,275      TOPDOWN.SLOTS                    #     37.1 %  tma_backend_bound      
                                                    #     38.7 %  tma_frontend_bound     
                                                    #      8.8 %  tma_bad_speculation    
                                                    #     15.3 %  tma_retiring           
           1,185,947      topdown-retiring                                                      
           3,010,483      topdown-fe-bound                                                      
           2,828,029      topdown-be-bound                                                      
             729,814      topdown-bad-spec                                                      
               9,987      INT_MISC.CLEARS_COUNT                                                 
             221,405      IDQ.MS_UOPS                                                           
               6,352      INT_MISC.UOP_DROPPING                                                 
           1,212,644      UOPS_RETIRED.SLOTS                                                    
             119,895      UOPS_DECODED.DEC0                                                     
              60,975      cpu/UOPS_DECODED.DEC0,cmask=1/                                        
           1,639,442      UOPS_ISSUED.ANY                                                       
             820,982      IDQ.MITE_UOPS                                                         
  
         0.001172956 seconds time elapsed
  
         0.001269000 seconds user
         0.000000000 seconds sys


And with this change, it does look better.

  $ perf stat -M topdownL1 true
  
   Performance counter stats for 'true':
  
           7,977,430      TOPDOWN.SLOTS                                                         
           1,188,793      topdown-retiring                                                      
           3,159,687      topdown-fe-bound                                                      
           2,940,699      topdown-be-bound                                                      
             688,248      topdown-bad-spec                                                      
               9,749      INT_MISC.CLEARS_COUNT            #     37.5 %  tma_backend_bound      
                                                    #      8.1 %  tma_bad_speculation    
             219,145      IDQ.MS_UOPS                      #     14.9 %  tma_retiring           
               6,188      INT_MISC.UOP_DROPPING            #     39.5 %  tma_frontend_bound     
           1,205,712      UOPS_RETIRED.SLOTS                                                    
             117,505      UOPS_DECODED.DEC0                                                     
              59,891      cpu/UOPS_DECODED.DEC0,cmask=1/                                        
           1,625,232      UOPS_ISSUED.ANY                                                       
             805,560      IDQ.MITE_UOPS                                                         
  
         0.001629344 seconds time elapsed
  
         0.001672000 seconds user
         0.000000000 seconds sys

> but they are minor. The main impact is on the Default legacy metrics
> as those have a counter then a metric, but without this change you get
> everything grouped on the cpu-clock event and the formatting gets broken.

Do you mean with other changes in this series?  I don't see any
differences in the output just after this patch..

Before:

  $ perf stat -a true
  
   Performance counter stats for 'system wide':
  
          19,078,719      cpu-clock                        #    7.256 CPUs utilized             
                  94      context-switches                 #    4.927 K/sec                     
                  14      cpu-migrations                   #  733.802 /sec                      
                  61      page-faults                      #    3.197 K/sec                     
          43,304,957      instructions                     #    1.10  insn per cycle            
          39,281,107      cycles                           #    2.059 GHz                       
           5,012,071      branches                         #  262.705 M/sec                     
             128,358      branch-misses                    #    2.56% of all branches           
   #     24.4 %  tma_retiring           
   #     33.7 %  tma_backend_bound      
                                                    #      5.9 %  tma_bad_speculation    
   #     36.0 %  tma_frontend_bound     
  
         0.002629534 seconds time elapsed
  
After:
  
  $ perf stat -a true
  
   Performance counter stats for 'system wide':
  
           6,201,661      cpu-clock                        #    3.692 CPUs utilized             
                  24      context-switches                 #    3.870 K/sec                     
                   7      cpu-migrations                   #    1.129 K/sec                     
                  60      page-faults                      #    9.675 K/sec                     
          11,458,681      instructions                     #    1.07  insn per cycle            
          10,704,978      cycles                           #    1.726 GHz                       
           2,457,704      branches                         #  396.298 M/sec                     
              54,553      branch-misses                    #    2.22% of all branches           
   #     21.4 %  tma_retiring           
   #     36.1 %  tma_backend_bound      
                                                    #     10.2 %  tma_bad_speculation    
   #     32.3 %  tma_frontend_bound     
  
         0.001679679 seconds time elapsed
  
Thanks,
Namhyung


> As --metric-only is popular when looking at a group of events
> and the Default legacy metrics are added in subsequent changes it
> didn't seem right to include the output (it either shows broken things
> keeping to be somewhat broken or output from later patches).
> 
> Thanks,
> Ian
> 
> > Thanks,
> > Namhyung
> >
> > >
> > > Signed-off-by: Ian Rogers <irogers@google.com>
> > > ---
> > >  tools/perf/util/metricgroup.c | 48 ++++++++++++++++++++++++++++++++++-
> > >  1 file changed, 47 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
> > > index 48936e517803..76092ee26761 100644
> > > --- a/tools/perf/util/metricgroup.c
> > > +++ b/tools/perf/util/metricgroup.c
> > > @@ -1323,6 +1323,51 @@ static int parse_ids(bool metric_no_merge, bool fake_pmu,
> > >       return ret;
> > >  }
> > >
> > > +/* How many times will a given evsel be used in a set of metrics? */
> > > +static int count_uses(struct list_head *metric_list, struct evsel *evsel)
> > > +{
> > > +     const char *metric_id = evsel__metric_id(evsel);
> > > +     struct metric *m;
> > > +     int uses = 0;
> > > +
> > > +     list_for_each_entry(m, metric_list, nd) {
> > > +             if (hashmap__find(m->pctx->ids, metric_id, NULL))
> > > +                     uses++;
> > > +     }
> > > +     return uses;
> > > +}
> > > +
> > > +/*
> > > + * Select the evsel that stat-display will use to trigger shadow/metric
> > > + * printing. Pick the least shared non-tool evsel, encouraging metrics to be
> > > + * with a hardware counter that is specific to them.
> > > + */
> > > +static struct evsel *pick_display_evsel(struct list_head *metric_list,
> > > +                                     struct evsel **metric_events)
> > > +{
> > > +     struct evsel *selected = metric_events[0];
> > > +     size_t selected_uses;
> > > +     bool selected_is_tool;
> > > +
> > > +     if (!selected)
> > > +             return NULL;
> > > +
> > > +     selected_uses = count_uses(metric_list, selected);
> > > +     selected_is_tool = evsel__is_tool(selected);
> > > +     for (int i = 1; metric_events[i]; i++) {
> > > +             struct evsel *candidate = metric_events[i];
> > > +             size_t candidate_uses = count_uses(metric_list, candidate);
> > > +
> > > +             if ((selected_is_tool && !evsel__is_tool(candidate)) ||
> > > +                 (candidate_uses < selected_uses)) {
> > > +                     selected = candidate;
> > > +                     selected_uses = candidate_uses;
> > > +                     selected_is_tool = evsel__is_tool(selected);
> > > +             }
> > > +     }
> > > +     return selected;
> > > +}
> > > +
> > >  static int parse_groups(struct evlist *perf_evlist,
> > >                       const char *pmu, const char *str,
> > >                       bool metric_no_group,
> > > @@ -1430,7 +1475,8 @@ static int parse_groups(struct evlist *perf_evlist,
> > >                       goto out;
> > >               }
> > >
> > > -             me = metricgroup__lookup(&perf_evlist->metric_events, metric_events[0],
> > > +             me = metricgroup__lookup(&perf_evlist->metric_events,
> > > +                                      pick_display_evsel(&metric_list, metric_events),
> > >                                        /*create=*/true);
> > >
> > >               expr = malloc(sizeof(struct metric_expr));
> > > --
> > > 2.51.1.821.gb6fe4d2222-goog
> > >

Re: [PATCH v1 05/22] perf metricgroup: Add care to picking the evsel for displaying a metric

Posted by Ian Rogers 3 months ago

On Wed, Nov 5, 2025 at 10:03 PM Namhyung Kim <namhyung@kernel.org> wrote:
>
> On Mon, Nov 03, 2025 at 09:28:44PM -0800, Ian Rogers wrote:
> > On Mon, Nov 3, 2025 at 8:52 PM Namhyung Kim <namhyung@kernel.org> wrote:
> > >
> > > On Fri, Oct 24, 2025 at 10:58:40AM -0700, Ian Rogers wrote:
> > > > Rather than using the first evsel in the matched events, try to find
> > > > the least shared non-tool evsel. The aim is to pick the first evsel
> > > > that typifies the metric within the list of metrics.
> > > >
> > > > This addresses an issue where Default metric group metrics may lose
> > > > their counter value due to how the stat displaying hides counters for
> > > > default event/metric output.
> > >
> > > Do you have a command line example to show impact of this change?
> >
> > You can just run a Topdown metricgroup on Intel to see differences,
>
> Ok, before this change.
>
>   $ perf stat -M topdownL1 true
>
>    Performance counter stats for 'true':
>
>            7,754,275      TOPDOWN.SLOTS                    #     37.1 %  tma_backend_bound
>                                                     #     38.7 %  tma_frontend_bound
>                                                     #      8.8 %  tma_bad_speculation
>                                                     #     15.3 %  tma_retiring
>            1,185,947      topdown-retiring
>            3,010,483      topdown-fe-bound
>            2,828,029      topdown-be-bound
>              729,814      topdown-bad-spec
>                9,987      INT_MISC.CLEARS_COUNT
>              221,405      IDQ.MS_UOPS
>                6,352      INT_MISC.UOP_DROPPING
>            1,212,644      UOPS_RETIRED.SLOTS
>              119,895      UOPS_DECODED.DEC0
>               60,975      cpu/UOPS_DECODED.DEC0,cmask=1/
>            1,639,442      UOPS_ISSUED.ANY
>              820,982      IDQ.MITE_UOPS
>
>          0.001172956 seconds time elapsed
>
>          0.001269000 seconds user
>          0.000000000 seconds sys
>
>
> And with this change, it does look better.
>
>   $ perf stat -M topdownL1 true
>
>    Performance counter stats for 'true':
>
>            7,977,430      TOPDOWN.SLOTS
>            1,188,793      topdown-retiring
>            3,159,687      topdown-fe-bound
>            2,940,699      topdown-be-bound
>              688,248      topdown-bad-spec
>                9,749      INT_MISC.CLEARS_COUNT            #     37.5 %  tma_backend_bound
>                                                     #      8.1 %  tma_bad_speculation
>              219,145      IDQ.MS_UOPS                      #     14.9 %  tma_retiring
>                6,188      INT_MISC.UOP_DROPPING            #     39.5 %  tma_frontend_bound
>            1,205,712      UOPS_RETIRED.SLOTS
>              117,505      UOPS_DECODED.DEC0
>               59,891      cpu/UOPS_DECODED.DEC0,cmask=1/
>            1,625,232      UOPS_ISSUED.ANY
>              805,560      IDQ.MITE_UOPS
>
>          0.001629344 seconds time elapsed
>
>          0.001672000 seconds user
>          0.000000000 seconds sys
>
> > but they are minor. The main impact is on the Default legacy metrics
> > as those have a counter then a metric, but without this change you get
> > everything grouped on the cpu-clock event and the formatting gets broken.
>
> Do you mean with other changes in this series?  I don't see any
> differences in the output just after this patch..
>
> Before:
>
>   $ perf stat -a true
>
>    Performance counter stats for 'system wide':
>
>           19,078,719      cpu-clock                        #    7.256 CPUs utilized
>                   94      context-switches                 #    4.927 K/sec
>                   14      cpu-migrations                   #  733.802 /sec
>                   61      page-faults                      #    3.197 K/sec
>           43,304,957      instructions                     #    1.10  insn per cycle
>           39,281,107      cycles                           #    2.059 GHz
>            5,012,071      branches                         #  262.705 M/sec
>              128,358      branch-misses                    #    2.56% of all branches
>    #     24.4 %  tma_retiring
>    #     33.7 %  tma_backend_bound
>                                                     #      5.9 %  tma_bad_speculation
>    #     36.0 %  tma_frontend_bound
>
>          0.002629534 seconds time elapsed
>
> After:
>
>   $ perf stat -a true
>
>    Performance counter stats for 'system wide':
>
>            6,201,661      cpu-clock                        #    3.692 CPUs utilized
>                   24      context-switches                 #    3.870 K/sec
>                    7      cpu-migrations                   #    1.129 K/sec
>                   60      page-faults                      #    9.675 K/sec
>           11,458,681      instructions                     #    1.07  insn per cycle
>           10,704,978      cycles                           #    1.726 GHz
>            2,457,704      branches                         #  396.298 M/sec
>               54,553      branch-misses                    #    2.22% of all branches
>    #     21.4 %  tma_retiring
>    #     36.1 %  tma_backend_bound
>                                                     #     10.2 %  tma_bad_speculation
>    #     32.3 %  tma_frontend_bound
>
>          0.001679679 seconds time elapsed

These are the hardcoded metrics that aren't impacted by my changes to
the json metric's behavior. Patch 8 will add json for the legacy
metrics.

Thanks,
Ian

> Thanks,
> Namhyung
>
>
> > As --metric-only is popular when looking at a group of events
> > and the Default legacy metrics are added in subsequent changes it
> > didn't seem right to include the output (it either shows broken things
> > keeping to be somewhat broken or output from later patches).
> >
> > Thanks,
> > Ian
> >
> > > Thanks,
> > > Namhyung
> > >
> > > >
> > > > Signed-off-by: Ian Rogers <irogers@google.com>
> > > > ---
> > > >  tools/perf/util/metricgroup.c | 48 ++++++++++++++++++++++++++++++++++-
> > > >  1 file changed, 47 insertions(+), 1 deletion(-)
> > > >
> > > > diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
> > > > index 48936e517803..76092ee26761 100644
> > > > --- a/tools/perf/util/metricgroup.c
> > > > +++ b/tools/perf/util/metricgroup.c
> > > > @@ -1323,6 +1323,51 @@ static int parse_ids(bool metric_no_merge, bool fake_pmu,
> > > >       return ret;
> > > >  }
> > > >
> > > > +/* How many times will a given evsel be used in a set of metrics? */
> > > > +static int count_uses(struct list_head *metric_list, struct evsel *evsel)
> > > > +{
> > > > +     const char *metric_id = evsel__metric_id(evsel);
> > > > +     struct metric *m;
> > > > +     int uses = 0;
> > > > +
> > > > +     list_for_each_entry(m, metric_list, nd) {
> > > > +             if (hashmap__find(m->pctx->ids, metric_id, NULL))
> > > > +                     uses++;
> > > > +     }
> > > > +     return uses;
> > > > +}
> > > > +
> > > > +/*
> > > > + * Select the evsel that stat-display will use to trigger shadow/metric
> > > > + * printing. Pick the least shared non-tool evsel, encouraging metrics to be
> > > > + * with a hardware counter that is specific to them.
> > > > + */
> > > > +static struct evsel *pick_display_evsel(struct list_head *metric_list,
> > > > +                                     struct evsel **metric_events)
> > > > +{
> > > > +     struct evsel *selected = metric_events[0];
> > > > +     size_t selected_uses;
> > > > +     bool selected_is_tool;
> > > > +
> > > > +     if (!selected)
> > > > +             return NULL;
> > > > +
> > > > +     selected_uses = count_uses(metric_list, selected);
> > > > +     selected_is_tool = evsel__is_tool(selected);
> > > > +     for (int i = 1; metric_events[i]; i++) {
> > > > +             struct evsel *candidate = metric_events[i];
> > > > +             size_t candidate_uses = count_uses(metric_list, candidate);
> > > > +
> > > > +             if ((selected_is_tool && !evsel__is_tool(candidate)) ||
> > > > +                 (candidate_uses < selected_uses)) {
> > > > +                     selected = candidate;
> > > > +                     selected_uses = candidate_uses;
> > > > +                     selected_is_tool = evsel__is_tool(selected);
> > > > +             }
> > > > +     }
> > > > +     return selected;
> > > > +}
> > > > +
> > > >  static int parse_groups(struct evlist *perf_evlist,
> > > >                       const char *pmu, const char *str,
> > > >                       bool metric_no_group,
> > > > @@ -1430,7 +1475,8 @@ static int parse_groups(struct evlist *perf_evlist,
> > > >                       goto out;
> > > >               }
> > > >
> > > > -             me = metricgroup__lookup(&perf_evlist->metric_events, metric_events[0],
> > > > +             me = metricgroup__lookup(&perf_evlist->metric_events,
> > > > +                                      pick_display_evsel(&metric_list, metric_events),
> > > >                                        /*create=*/true);
> > > >
> > > >               expr = malloc(sizeof(struct metric_expr));
> > > > --
> > > > 2.51.1.821.gb6fe4d2222-goog
> > > >