[PATCH 2/3] perf list: Collapse similar events across PMUs

James Clark posted 3 patches 11 months, 1 week ago
[PATCH 2/3] perf list: Collapse similar events across PMUs
Posted by James Clark 11 months, 1 week ago
Instead of showing multiple line items with the same event name and
description, show a single line and concatenate all PMUs that this
event can belong to.

Don't do it for json output. Machine readable output doesn't need to be
minimized, and changing the "Unit" field to a list type would break
backwards compatibility.

Before:
 $ perf list -v
 ...
 br_indirect_spec
       [Branch speculatively executed,indirect branch. Unit: armv8_cortex_a53]
 br_indirect_spec
       [Branch speculatively executed,indirect branch. Unit: armv8_cortex_a57]

After:

 $ perf list -v
 ...
 br_indirect_spec
       [Branch speculatively executed,indirect branch. Unit: armv8_cortex_a53,armv8_cortex_a57]

Signed-off-by: James Clark <james.clark@linaro.org>
---
 tools/perf/builtin-list.c      |  2 ++
 tools/perf/util/pmus.c         | 75 +++++++++++++++++++++++++++++++++++++-----
 tools/perf/util/print-events.h |  1 +
 3 files changed, 70 insertions(+), 8 deletions(-)

diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index fed482adb039..aacd7beae2a0 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -516,6 +516,7 @@ int cmd_list(int argc, const char **argv)
 		.print_event = default_print_event,
 		.print_metric = default_print_metric,
 		.skip_duplicate_pmus = default_skip_duplicate_pmus,
+		.collapse_events = true
 	};
 	const char *cputype = NULL;
 	const char *unit_name = NULL;
@@ -574,6 +575,7 @@ int cmd_list(int argc, const char **argv)
 			.print_event = json_print_event,
 			.print_metric = json_print_metric,
 			.skip_duplicate_pmus = json_skip_duplicate_pmus,
+			.collapse_events = false
 		};
 		ps = &json_ps;
 	} else {
diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c
index 4d60bac2d2b9..cb1b14ade25b 100644
--- a/tools/perf/util/pmus.c
+++ b/tools/perf/util/pmus.c
@@ -453,17 +453,50 @@ static int cmp_sevent(const void *a, const void *b)
 	/* Order by PMU name. */
 	if (as->pmu == bs->pmu)
 		return 0;
-	return strcmp(as->pmu_name ?: "", bs->pmu_name ?: "");
+	ret = strcmp(as->pmu_name ?: "", bs->pmu_name ?: "");
+	if (ret)
+		return ret;
+
+	/* Order by remaining displayed fields for purposes of deduplication later */
+	ret = strcmp(as->scale_unit ?: "", bs->scale_unit ?: "");
+	if (ret)
+		return ret;
+	ret = !!as->deprecated - !!bs->deprecated;
+	if (ret)
+		return ret;
+	ret = strcmp(as->desc ?: "", bs->desc ?: "");
+	if (ret)
+		return ret;
+	return strcmp(as->long_desc ?: "", bs->long_desc ?: "");
 }
 
-static bool pmu_alias_is_duplicate(struct sevent *a, struct sevent *b)
+enum dup_type {
+	UNIQUE,
+	DUPLICATE,
+	SAME_TEXT
+};
+
+static enum dup_type pmu_alias_duplicate_type(struct sevent *a, struct sevent *b)
 {
 	/* Different names -> never duplicates */
 	if (strcmp(a->name ?: "//", b->name ?: "//"))
-		return false;
+		return UNIQUE;
+
+	/* Duplicate PMU name and event name -> hide completely */
+	if (strcmp(a->pmu_name, b->pmu_name) == 0)
+		return DUPLICATE;
+
+	/* Any other different display text -> not duplicate */
+	if (strcmp(a->topic ?: "", b->topic ?: "") ||
+	    strcmp(a->scale_unit ?: "", b->scale_unit ?: "") ||
+	    a->deprecated != b->deprecated ||
+	    strcmp(a->desc ?: "", b->desc ?: "") ||
+	    strcmp(a->long_desc ?: "", b->long_desc ?: "")) {
+		return UNIQUE;
+	}
 
-	/* Don't remove duplicates for different PMUs */
-	return strcmp(a->pmu_name, b->pmu_name) == 0;
+	/* Same display text but different PMU -> collapse */
+	return SAME_TEXT;
 }
 
 struct events_callback_state {
@@ -501,6 +534,21 @@ static int perf_pmus__print_pmu_events__callback(void *vstate,
 	return 0;
 }
 
+static void concat_pmu_names(char *pmu_names, size_t size, const char *a, const char *b)
+{
+	size_t len = strlen(pmu_names);
+	size_t added;
+
+	if (len)
+		added = snprintf(pmu_names + len, size - len, ",%s", b);
+	else
+		added = snprintf(pmu_names, size, "%s,%s", a, b);
+
+	/* Truncate with ... */
+	if (added > 0 && added + len >= size)
+		sprintf(pmu_names + size - 4, "...");
+}
+
 void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *print_state)
 {
 	struct perf_pmu *pmu;
@@ -510,6 +558,7 @@ void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *p
 	struct events_callback_state state;
 	bool skip_duplicate_pmus = print_cb->skip_duplicate_pmus(print_state);
 	struct perf_pmu *(*scan_fn)(struct perf_pmu *);
+	char pmu_names[128] = {0};
 
 	if (skip_duplicate_pmus)
 		scan_fn = perf_pmus__scan_skip_duplicates;
@@ -539,12 +588,21 @@ void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *p
 	qsort(aliases, len, sizeof(struct sevent), cmp_sevent);
 	for (int j = 0; j < len; j++) {
 		/* Skip duplicates */
-		if (j < len - 1 && pmu_alias_is_duplicate(&aliases[j], &aliases[j + 1]))
-			goto free;
+		if (j < len - 1) {
+			enum dup_type dt = pmu_alias_duplicate_type(&aliases[j], &aliases[j + 1]);
+
+			if (dt == DUPLICATE) {
+				goto free;
+			} else if (print_cb->collapse_events && dt == SAME_TEXT) {
+				concat_pmu_names(pmu_names, sizeof(pmu_names),
+						 aliases[j].pmu_name, aliases[j+1].pmu_name);
+				goto free;
+			}
+		}
 
 		print_cb->print_event(print_state,
 				aliases[j].topic,
-				aliases[j].pmu_name,
+				pmu_names[0] ? pmu_names : aliases[j].pmu_name,
 				aliases[j].name,
 				aliases[j].alias,
 				aliases[j].scale_unit,
@@ -553,6 +611,7 @@ void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *p
 				aliases[j].desc,
 				aliases[j].long_desc,
 				aliases[j].encoding_desc);
+		pmu_names[0] = '\0';
 free:
 		zfree(&aliases[j].name);
 		zfree(&aliases[j].alias);
diff --git a/tools/perf/util/print-events.h b/tools/perf/util/print-events.h
index 445efa1636c1..e91f9f830a2a 100644
--- a/tools/perf/util/print-events.h
+++ b/tools/perf/util/print-events.h
@@ -27,6 +27,7 @@ struct print_callbacks {
 			const char *threshold,
 			const char *unit);
 	bool (*skip_duplicate_pmus)(void *print_state);
+	bool collapse_events;
 };
 
 /** Print all events, the default when no options are specified. */

-- 
2.34.1
Re: [PATCH 2/3] perf list: Collapse similar events across PMUs
Posted by Ian Rogers 11 months, 1 week ago
On Tue, Mar 4, 2025 at 5:50 AM James Clark <james.clark@linaro.org> wrote:
>
> Instead of showing multiple line items with the same event name and
> description, show a single line and concatenate all PMUs that this
> event can belong to.
>
> Don't do it for json output. Machine readable output doesn't need to be
> minimized, and changing the "Unit" field to a list type would break
> backwards compatibility.
>
> Before:
>  $ perf list -v
>  ...
>  br_indirect_spec
>        [Branch speculatively executed,indirect branch. Unit: armv8_cortex_a53]
>  br_indirect_spec
>        [Branch speculatively executed,indirect branch. Unit: armv8_cortex_a57]
>
> After:
>
>  $ perf list -v
>  ...
>  br_indirect_spec
>        [Branch speculatively executed,indirect branch. Unit: armv8_cortex_a53,armv8_cortex_a57]
>
> Signed-off-by: James Clark <james.clark@linaro.org>
> ---
>  tools/perf/builtin-list.c      |  2 ++
>  tools/perf/util/pmus.c         | 75 +++++++++++++++++++++++++++++++++++++-----
>  tools/perf/util/print-events.h |  1 +
>  3 files changed, 70 insertions(+), 8 deletions(-)
>
> diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
> index fed482adb039..aacd7beae2a0 100644
> --- a/tools/perf/builtin-list.c
> +++ b/tools/perf/builtin-list.c
> @@ -516,6 +516,7 @@ int cmd_list(int argc, const char **argv)
>                 .print_event = default_print_event,
>                 .print_metric = default_print_metric,
>                 .skip_duplicate_pmus = default_skip_duplicate_pmus,
> +               .collapse_events = true

So collapse_events is put in the callbacks but isn't a callback. I
think skipping duplicates and collapsing are the same thing, I'd
prefer it if there weren't two terms for the same thing. If you prefer
collapsing as a name then default_skip_duplicate_pmus can be
default_collapse_pmus.

>         };
>         const char *cputype = NULL;
>         const char *unit_name = NULL;
> @@ -574,6 +575,7 @@ int cmd_list(int argc, const char **argv)
>                         .print_event = json_print_event,
>                         .print_metric = json_print_metric,
>                         .skip_duplicate_pmus = json_skip_duplicate_pmus,
> +                       .collapse_events = false
>                 };
>                 ps = &json_ps;
>         } else {
> diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c
> index 4d60bac2d2b9..cb1b14ade25b 100644
> --- a/tools/perf/util/pmus.c
> +++ b/tools/perf/util/pmus.c
> @@ -453,17 +453,50 @@ static int cmp_sevent(const void *a, const void *b)
>         /* Order by PMU name. */
>         if (as->pmu == bs->pmu)
>                 return 0;
> -       return strcmp(as->pmu_name ?: "", bs->pmu_name ?: "");
> +       ret = strcmp(as->pmu_name ?: "", bs->pmu_name ?: "");
> +       if (ret)
> +               return ret;
> +
> +       /* Order by remaining displayed fields for purposes of deduplication later */
> +       ret = strcmp(as->scale_unit ?: "", bs->scale_unit ?: "");
> +       if (ret)
> +               return ret;
> +       ret = !!as->deprecated - !!bs->deprecated;
> +       if (ret)
> +               return ret;
> +       ret = strcmp(as->desc ?: "", bs->desc ?: "");
> +       if (ret)
> +               return ret;
> +       return strcmp(as->long_desc ?: "", bs->long_desc ?: "");
>  }
>
> -static bool pmu_alias_is_duplicate(struct sevent *a, struct sevent *b)
> +enum dup_type {
> +       UNIQUE,
> +       DUPLICATE,
> +       SAME_TEXT
> +};
> +
> +static enum dup_type pmu_alias_duplicate_type(struct sevent *a, struct sevent *b)
>  {
>         /* Different names -> never duplicates */
>         if (strcmp(a->name ?: "//", b->name ?: "//"))
> -               return false;
> +               return UNIQUE;
> +
> +       /* Duplicate PMU name and event name -> hide completely */
> +       if (strcmp(a->pmu_name, b->pmu_name) == 0)
> +               return DUPLICATE;
> +
> +       /* Any other different display text -> not duplicate */
> +       if (strcmp(a->topic ?: "", b->topic ?: "") ||
> +           strcmp(a->scale_unit ?: "", b->scale_unit ?: "") ||
> +           a->deprecated != b->deprecated ||
> +           strcmp(a->desc ?: "", b->desc ?: "") ||
> +           strcmp(a->long_desc ?: "", b->long_desc ?: "")) {
> +               return UNIQUE;
> +       }
>
> -       /* Don't remove duplicates for different PMUs */
> -       return strcmp(a->pmu_name, b->pmu_name) == 0;
> +       /* Same display text but different PMU -> collapse */
> +       return SAME_TEXT;
>  }
>
>  struct events_callback_state {
> @@ -501,6 +534,21 @@ static int perf_pmus__print_pmu_events__callback(void *vstate,
>         return 0;
>  }
>
> +static void concat_pmu_names(char *pmu_names, size_t size, const char *a, const char *b)
> +{
> +       size_t len = strlen(pmu_names);
> +       size_t added;
> +
> +       if (len)
> +               added = snprintf(pmu_names + len, size - len, ",%s", b);
> +       else
> +               added = snprintf(pmu_names, size, "%s,%s", a, b);
> +
> +       /* Truncate with ... */
> +       if (added > 0 && added + len >= size)
> +               sprintf(pmu_names + size - 4, "...");
> +}
> +
>  void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *print_state)
>  {
>         struct perf_pmu *pmu;
> @@ -510,6 +558,7 @@ void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *p
>         struct events_callback_state state;
>         bool skip_duplicate_pmus = print_cb->skip_duplicate_pmus(print_state);
>         struct perf_pmu *(*scan_fn)(struct perf_pmu *);
> +       char pmu_names[128] = {0};
>
>         if (skip_duplicate_pmus)
>                 scan_fn = perf_pmus__scan_skip_duplicates;
> @@ -539,12 +588,21 @@ void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *p
>         qsort(aliases, len, sizeof(struct sevent), cmp_sevent);
>         for (int j = 0; j < len; j++) {
>                 /* Skip duplicates */
> -               if (j < len - 1 && pmu_alias_is_duplicate(&aliases[j], &aliases[j + 1]))
> -                       goto free;
> +               if (j < len - 1) {
> +                       enum dup_type dt = pmu_alias_duplicate_type(&aliases[j], &aliases[j + 1]);
> +
> +                       if (dt == DUPLICATE) {
> +                               goto free;
> +                       } else if (print_cb->collapse_events && dt == SAME_TEXT) {
> +                               concat_pmu_names(pmu_names, sizeof(pmu_names),
> +                                                aliases[j].pmu_name, aliases[j+1].pmu_name);
> +                               goto free;
> +                       }
> +               }

I think a label called 'free' is a little unfortunate given the
function called free.
When I did things with sevent I was bringing over previous `perf list`
code mainly so that the perf list output before and after the changes
was identical. I wonder if this logic would be better placed in the
callback like default_print_event which already maintains state
(last_topic) to optionally print different things. This may better
encapsulate the behavior rather than the logic being in the PMU code.

>
>                 print_cb->print_event(print_state,
>                                 aliases[j].topic,
> -                               aliases[j].pmu_name,
> +                               pmu_names[0] ? pmu_names : aliases[j].pmu_name,
>                                 aliases[j].name,
>                                 aliases[j].alias,
>                                 aliases[j].scale_unit,
> @@ -553,6 +611,7 @@ void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *p
>                                 aliases[j].desc,
>                                 aliases[j].long_desc,
>                                 aliases[j].encoding_desc);

The encoding_desc will have a PMU with the suffix removed as per
skipping duplicates:
https://web.git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/util/pmu.c?h=perf-tools-next#n1954
So I think something like:
```
  br_mis_pred_retired
       [Instruction architecturally executed,mispredicted branch. Unit:
        armv9_cortex_a510,armv9_cortex_a710]
```
Would have an encoding of `armv9_cortex_a510/.../` without the a710
encoding being present. That said, I'm not sure anyone cares :-)

Thanks,
Ian

> +               pmu_names[0] = '\0';
>  free:
>                 zfree(&aliases[j].name);
>                 zfree(&aliases[j].alias);
> diff --git a/tools/perf/util/print-events.h b/tools/perf/util/print-events.h
> index 445efa1636c1..e91f9f830a2a 100644
> --- a/tools/perf/util/print-events.h
> +++ b/tools/perf/util/print-events.h
> @@ -27,6 +27,7 @@ struct print_callbacks {
>                         const char *threshold,
>                         const char *unit);
>         bool (*skip_duplicate_pmus)(void *print_state);
> +       bool collapse_events;
>  };
>
>  /** Print all events, the default when no options are specified. */
>
> --
> 2.34.1
>
Re: [PATCH 2/3] perf list: Collapse similar events across PMUs
Posted by James Clark 11 months, 1 week ago

On 05/03/2025 9:40 pm, Ian Rogers wrote:
> On Tue, Mar 4, 2025 at 5:50 AM James Clark <james.clark@linaro.org> wrote:
>>
>> Instead of showing multiple line items with the same event name and
>> description, show a single line and concatenate all PMUs that this
>> event can belong to.
>>
>> Don't do it for json output. Machine readable output doesn't need to be
>> minimized, and changing the "Unit" field to a list type would break
>> backwards compatibility.
>>
>> Before:
>>   $ perf list -v
>>   ...
>>   br_indirect_spec
>>         [Branch speculatively executed,indirect branch. Unit: armv8_cortex_a53]
>>   br_indirect_spec
>>         [Branch speculatively executed,indirect branch. Unit: armv8_cortex_a57]
>>
>> After:
>>
>>   $ perf list -v
>>   ...
>>   br_indirect_spec
>>         [Branch speculatively executed,indirect branch. Unit: armv8_cortex_a53,armv8_cortex_a57]
>>
>> Signed-off-by: James Clark <james.clark@linaro.org>
>> ---
>>   tools/perf/builtin-list.c      |  2 ++
>>   tools/perf/util/pmus.c         | 75 +++++++++++++++++++++++++++++++++++++-----
>>   tools/perf/util/print-events.h |  1 +
>>   3 files changed, 70 insertions(+), 8 deletions(-)
>>
>> diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
>> index fed482adb039..aacd7beae2a0 100644
>> --- a/tools/perf/builtin-list.c
>> +++ b/tools/perf/builtin-list.c
>> @@ -516,6 +516,7 @@ int cmd_list(int argc, const char **argv)
>>                  .print_event = default_print_event,
>>                  .print_metric = default_print_metric,
>>                  .skip_duplicate_pmus = default_skip_duplicate_pmus,
>> +               .collapse_events = true
> 
> So collapse_events is put in the callbacks but isn't a callback. I
> think skipping duplicates and collapsing are the same thing, I'd
> prefer it if there weren't two terms for the same thing. If you prefer
> collapsing as a name then default_skip_duplicate_pmus can be
> default_collapse_pmus.
> 

I can use the existing callback and rename it. That does have the effect 
of disabling this behavior in verbose mode which may be useful or may be 
unexpected to some people. But it seems pretty 50/50 so fewer callbacks 
are probably better.

>>          };
>>          const char *cputype = NULL;
>>          const char *unit_name = NULL;
>> @@ -574,6 +575,7 @@ int cmd_list(int argc, const char **argv)
>>                          .print_event = json_print_event,
>>                          .print_metric = json_print_metric,
>>                          .skip_duplicate_pmus = json_skip_duplicate_pmus,
>> +                       .collapse_events = false
>>                  };
>>                  ps = &json_ps;
>>          } else {
>> diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c
>> index 4d60bac2d2b9..cb1b14ade25b 100644
>> --- a/tools/perf/util/pmus.c
>> +++ b/tools/perf/util/pmus.c
>> @@ -453,17 +453,50 @@ static int cmp_sevent(const void *a, const void *b)
>>          /* Order by PMU name. */
>>          if (as->pmu == bs->pmu)
>>                  return 0;
>> -       return strcmp(as->pmu_name ?: "", bs->pmu_name ?: "");
>> +       ret = strcmp(as->pmu_name ?: "", bs->pmu_name ?: "");
>> +       if (ret)
>> +               return ret;
>> +
>> +       /* Order by remaining displayed fields for purposes of deduplication later */
>> +       ret = strcmp(as->scale_unit ?: "", bs->scale_unit ?: "");
>> +       if (ret)
>> +               return ret;
>> +       ret = !!as->deprecated - !!bs->deprecated;
>> +       if (ret)
>> +               return ret;
>> +       ret = strcmp(as->desc ?: "", bs->desc ?: "");
>> +       if (ret)
>> +               return ret;
>> +       return strcmp(as->long_desc ?: "", bs->long_desc ?: "");
>>   }
>>
>> -static bool pmu_alias_is_duplicate(struct sevent *a, struct sevent *b)
>> +enum dup_type {
>> +       UNIQUE,
>> +       DUPLICATE,
>> +       SAME_TEXT
>> +};
>> +
>> +static enum dup_type pmu_alias_duplicate_type(struct sevent *a, struct sevent *b)
>>   {
>>          /* Different names -> never duplicates */
>>          if (strcmp(a->name ?: "//", b->name ?: "//"))
>> -               return false;
>> +               return UNIQUE;
>> +
>> +       /* Duplicate PMU name and event name -> hide completely */
>> +       if (strcmp(a->pmu_name, b->pmu_name) == 0)
>> +               return DUPLICATE;
>> +
>> +       /* Any other different display text -> not duplicate */
>> +       if (strcmp(a->topic ?: "", b->topic ?: "") ||
>> +           strcmp(a->scale_unit ?: "", b->scale_unit ?: "") ||
>> +           a->deprecated != b->deprecated ||
>> +           strcmp(a->desc ?: "", b->desc ?: "") ||
>> +           strcmp(a->long_desc ?: "", b->long_desc ?: "")) {
>> +               return UNIQUE;
>> +       }
>>
>> -       /* Don't remove duplicates for different PMUs */
>> -       return strcmp(a->pmu_name, b->pmu_name) == 0;
>> +       /* Same display text but different PMU -> collapse */
>> +       return SAME_TEXT;
>>   }
>>
>>   struct events_callback_state {
>> @@ -501,6 +534,21 @@ static int perf_pmus__print_pmu_events__callback(void *vstate,
>>          return 0;
>>   }
>>
>> +static void concat_pmu_names(char *pmu_names, size_t size, const char *a, const char *b)
>> +{
>> +       size_t len = strlen(pmu_names);
>> +       size_t added;
>> +
>> +       if (len)
>> +               added = snprintf(pmu_names + len, size - len, ",%s", b);
>> +       else
>> +               added = snprintf(pmu_names, size, "%s,%s", a, b);
>> +
>> +       /* Truncate with ... */
>> +       if (added > 0 && added + len >= size)
>> +               sprintf(pmu_names + size - 4, "...");
>> +}
>> +
>>   void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *print_state)
>>   {
>>          struct perf_pmu *pmu;
>> @@ -510,6 +558,7 @@ void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *p
>>          struct events_callback_state state;
>>          bool skip_duplicate_pmus = print_cb->skip_duplicate_pmus(print_state);
>>          struct perf_pmu *(*scan_fn)(struct perf_pmu *);
>> +       char pmu_names[128] = {0};
>>
>>          if (skip_duplicate_pmus)
>>                  scan_fn = perf_pmus__scan_skip_duplicates;
>> @@ -539,12 +588,21 @@ void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *p
>>          qsort(aliases, len, sizeof(struct sevent), cmp_sevent);
>>          for (int j = 0; j < len; j++) {
>>                  /* Skip duplicates */
>> -               if (j < len - 1 && pmu_alias_is_duplicate(&aliases[j], &aliases[j + 1]))
>> -                       goto free;
>> +               if (j < len - 1) {
>> +                       enum dup_type dt = pmu_alias_duplicate_type(&aliases[j], &aliases[j + 1]);
>> +
>> +                       if (dt == DUPLICATE) {
>> +                               goto free;
>> +                       } else if (print_cb->collapse_events && dt == SAME_TEXT) {
>> +                               concat_pmu_names(pmu_names, sizeof(pmu_names),
>> +                                                aliases[j].pmu_name, aliases[j+1].pmu_name);
>> +                               goto free;
>> +                       }
>> +               }
> 
> I think a label called 'free' is a little unfortunate given the
> function called free.
> When I did things with sevent I was bringing over previous `perf list`
> code mainly so that the perf list output before and after the changes
> was identical. I wonder if this logic would be better placed in the
> callback like default_print_event which already maintains state
> (last_topic) to optionally print different things. This may better
> encapsulate the behavior rather than the logic being in the PMU code.
> 

Yeah I can have a look at putting it in one of the callbacks. But in the 
end builtin-list.c is the only user of perf_pmus__print_pmu_events() 
anyway so makes you wonder if the whole thing can be moved to 
builtin-list and open coded without the callbackyness.

>>
>>                  print_cb->print_event(print_state,
>>                                  aliases[j].topic,
>> -                               aliases[j].pmu_name,
>> +                               pmu_names[0] ? pmu_names : aliases[j].pmu_name,
>>                                  aliases[j].name,
>>                                  aliases[j].alias,
>>                                  aliases[j].scale_unit,
>> @@ -553,6 +611,7 @@ void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *p
>>                                  aliases[j].desc,
>>                                  aliases[j].long_desc,
>>                                  aliases[j].encoding_desc);
> 
> The encoding_desc will have a PMU with the suffix removed as per
> skipping duplicates:
> https://web.git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/util/pmu.c?h=perf-tools-next#n1954
> So I think something like:
> ```
>    br_mis_pred_retired
>         [Instruction architecturally executed,mispredicted branch. Unit:
>          armv9_cortex_a510,armv9_cortex_a710]
> ```
> Would have an encoding of `armv9_cortex_a510/.../` without the a710
> encoding being present. That said, I'm not sure anyone cares :-)
> 
> Thanks,
> Ian
> 

Ah, in that case I'll disable it for --detailed as well as --json. I 
could compare encoding_desc in pmu_alias_duplicate_type() but they'll 
always be different because of the PMU name, so there's no point. And 
displaying multiple encoding_descs would be fiddly too.

>> +               pmu_names[0] = '\0';
>>   free:
>>                  zfree(&aliases[j].name);
>>                  zfree(&aliases[j].alias);
>> diff --git a/tools/perf/util/print-events.h b/tools/perf/util/print-events.h
>> index 445efa1636c1..e91f9f830a2a 100644
>> --- a/tools/perf/util/print-events.h
>> +++ b/tools/perf/util/print-events.h
>> @@ -27,6 +27,7 @@ struct print_callbacks {
>>                          const char *threshold,
>>                          const char *unit);
>>          bool (*skip_duplicate_pmus)(void *print_state);
>> +       bool collapse_events;
>>   };
>>
>>   /** Print all events, the default when no options are specified. */
>>
>> --
>> 2.34.1
>>

Re: [PATCH 2/3] perf list: Collapse similar events across PMUs
Posted by Ian Rogers 11 months, 1 week ago
On Fri, Mar 7, 2025 at 6:08 AM James Clark <james.clark@linaro.org> wrote:
>
>
>
> On 05/03/2025 9:40 pm, Ian Rogers wrote:
> > On Tue, Mar 4, 2025 at 5:50 AM James Clark <james.clark@linaro.org> wrote:
> >>
> >> Instead of showing multiple line items with the same event name and
> >> description, show a single line and concatenate all PMUs that this
> >> event can belong to.
> >>
> >> Don't do it for json output. Machine readable output doesn't need to be
> >> minimized, and changing the "Unit" field to a list type would break
> >> backwards compatibility.
> >>
> >> Before:
> >>   $ perf list -v
> >>   ...
> >>   br_indirect_spec
> >>         [Branch speculatively executed,indirect branch. Unit: armv8_cortex_a53]
> >>   br_indirect_spec
> >>         [Branch speculatively executed,indirect branch. Unit: armv8_cortex_a57]
> >>
> >> After:
> >>
> >>   $ perf list -v
> >>   ...
> >>   br_indirect_spec
> >>         [Branch speculatively executed,indirect branch. Unit: armv8_cortex_a53,armv8_cortex_a57]
> >>
> >> Signed-off-by: James Clark <james.clark@linaro.org>
> >> ---
> >>   tools/perf/builtin-list.c      |  2 ++
> >>   tools/perf/util/pmus.c         | 75 +++++++++++++++++++++++++++++++++++++-----
> >>   tools/perf/util/print-events.h |  1 +
> >>   3 files changed, 70 insertions(+), 8 deletions(-)
> >>
> >> diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
> >> index fed482adb039..aacd7beae2a0 100644
> >> --- a/tools/perf/builtin-list.c
> >> +++ b/tools/perf/builtin-list.c
> >> @@ -516,6 +516,7 @@ int cmd_list(int argc, const char **argv)
> >>                  .print_event = default_print_event,
> >>                  .print_metric = default_print_metric,
> >>                  .skip_duplicate_pmus = default_skip_duplicate_pmus,
> >> +               .collapse_events = true
> >
> > So collapse_events is put in the callbacks but isn't a callback. I
> > think skipping duplicates and collapsing are the same thing, I'd
> > prefer it if there weren't two terms for the same thing. If you prefer
> > collapsing as a name then default_skip_duplicate_pmus can be
> > default_collapse_pmus.
> >
>
> I can use the existing callback and rename it. That does have the effect
> of disabling this behavior in verbose mode which may be useful or may be
> unexpected to some people. But it seems pretty 50/50 so fewer callbacks
> are probably better.
>
> >>          };
> >>          const char *cputype = NULL;
> >>          const char *unit_name = NULL;
> >> @@ -574,6 +575,7 @@ int cmd_list(int argc, const char **argv)
> >>                          .print_event = json_print_event,
> >>                          .print_metric = json_print_metric,
> >>                          .skip_duplicate_pmus = json_skip_duplicate_pmus,
> >> +                       .collapse_events = false
> >>                  };
> >>                  ps = &json_ps;
> >>          } else {
> >> diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c
> >> index 4d60bac2d2b9..cb1b14ade25b 100644
> >> --- a/tools/perf/util/pmus.c
> >> +++ b/tools/perf/util/pmus.c
> >> @@ -453,17 +453,50 @@ static int cmp_sevent(const void *a, const void *b)
> >>          /* Order by PMU name. */
> >>          if (as->pmu == bs->pmu)
> >>                  return 0;
> >> -       return strcmp(as->pmu_name ?: "", bs->pmu_name ?: "");
> >> +       ret = strcmp(as->pmu_name ?: "", bs->pmu_name ?: "");
> >> +       if (ret)
> >> +               return ret;
> >> +
> >> +       /* Order by remaining displayed fields for purposes of deduplication later */
> >> +       ret = strcmp(as->scale_unit ?: "", bs->scale_unit ?: "");
> >> +       if (ret)
> >> +               return ret;
> >> +       ret = !!as->deprecated - !!bs->deprecated;
> >> +       if (ret)
> >> +               return ret;
> >> +       ret = strcmp(as->desc ?: "", bs->desc ?: "");
> >> +       if (ret)
> >> +               return ret;
> >> +       return strcmp(as->long_desc ?: "", bs->long_desc ?: "");
> >>   }
> >>
> >> -static bool pmu_alias_is_duplicate(struct sevent *a, struct sevent *b)
> >> +enum dup_type {
> >> +       UNIQUE,
> >> +       DUPLICATE,
> >> +       SAME_TEXT
> >> +};
> >> +
> >> +static enum dup_type pmu_alias_duplicate_type(struct sevent *a, struct sevent *b)
> >>   {
> >>          /* Different names -> never duplicates */
> >>          if (strcmp(a->name ?: "//", b->name ?: "//"))
> >> -               return false;
> >> +               return UNIQUE;
> >> +
> >> +       /* Duplicate PMU name and event name -> hide completely */
> >> +       if (strcmp(a->pmu_name, b->pmu_name) == 0)
> >> +               return DUPLICATE;
> >> +
> >> +       /* Any other different display text -> not duplicate */
> >> +       if (strcmp(a->topic ?: "", b->topic ?: "") ||
> >> +           strcmp(a->scale_unit ?: "", b->scale_unit ?: "") ||
> >> +           a->deprecated != b->deprecated ||
> >> +           strcmp(a->desc ?: "", b->desc ?: "") ||
> >> +           strcmp(a->long_desc ?: "", b->long_desc ?: "")) {
> >> +               return UNIQUE;
> >> +       }
> >>
> >> -       /* Don't remove duplicates for different PMUs */
> >> -       return strcmp(a->pmu_name, b->pmu_name) == 0;
> >> +       /* Same display text but different PMU -> collapse */
> >> +       return SAME_TEXT;
> >>   }
> >>
> >>   struct events_callback_state {
> >> @@ -501,6 +534,21 @@ static int perf_pmus__print_pmu_events__callback(void *vstate,
> >>          return 0;
> >>   }
> >>
> >> +static void concat_pmu_names(char *pmu_names, size_t size, const char *a, const char *b)
> >> +{
> >> +       size_t len = strlen(pmu_names);
> >> +       size_t added;
> >> +
> >> +       if (len)
> >> +               added = snprintf(pmu_names + len, size - len, ",%s", b);
> >> +       else
> >> +               added = snprintf(pmu_names, size, "%s,%s", a, b);
> >> +
> >> +       /* Truncate with ... */
> >> +       if (added > 0 && added + len >= size)
> >> +               sprintf(pmu_names + size - 4, "...");
> >> +}
> >> +
> >>   void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *print_state)
> >>   {
> >>          struct perf_pmu *pmu;
> >> @@ -510,6 +558,7 @@ void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *p
> >>          struct events_callback_state state;
> >>          bool skip_duplicate_pmus = print_cb->skip_duplicate_pmus(print_state);
> >>          struct perf_pmu *(*scan_fn)(struct perf_pmu *);
> >> +       char pmu_names[128] = {0};
> >>
> >>          if (skip_duplicate_pmus)
> >>                  scan_fn = perf_pmus__scan_skip_duplicates;
> >> @@ -539,12 +588,21 @@ void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *p
> >>          qsort(aliases, len, sizeof(struct sevent), cmp_sevent);
> >>          for (int j = 0; j < len; j++) {
> >>                  /* Skip duplicates */
> >> -               if (j < len - 1 && pmu_alias_is_duplicate(&aliases[j], &aliases[j + 1]))
> >> -                       goto free;
> >> +               if (j < len - 1) {
> >> +                       enum dup_type dt = pmu_alias_duplicate_type(&aliases[j], &aliases[j + 1]);
> >> +
> >> +                       if (dt == DUPLICATE) {
> >> +                               goto free;
> >> +                       } else if (print_cb->collapse_events && dt == SAME_TEXT) {
> >> +                               concat_pmu_names(pmu_names, sizeof(pmu_names),
> >> +                                                aliases[j].pmu_name, aliases[j+1].pmu_name);
> >> +                               goto free;
> >> +                       }
> >> +               }
> >
> > I think a label called 'free' is a little unfortunate given the
> > function called free.
> > When I did things with sevent I was bringing over previous `perf list`
> > code mainly so that the perf list output before and after the changes
> > was identical. I wonder if this logic would be better placed in the
> > callback like default_print_event which already maintains state
> > (last_topic) to optionally print different things. This may better
> > encapsulate the behavior rather than the logic being in the PMU code.
> >
>
> Yeah I can have a look at putting it in one of the callbacks. But in the
> end builtin-list.c is the only user of perf_pmus__print_pmu_events()
> anyway so makes you wonder if the whole thing can be moved to
> builtin-list and open coded without the callbackyness.

I wanted to hide some of the innards of pmus, so I think that's the
reason it is as it is. The whole `sevent` was pre-existing and
maintained so that the output was the same.

> >>
> >>                  print_cb->print_event(print_state,
> >>                                  aliases[j].topic,
> >> -                               aliases[j].pmu_name,
> >> +                               pmu_names[0] ? pmu_names : aliases[j].pmu_name,
> >>                                  aliases[j].name,
> >>                                  aliases[j].alias,
> >>                                  aliases[j].scale_unit,
> >> @@ -553,6 +611,7 @@ void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *p
> >>                                  aliases[j].desc,
> >>                                  aliases[j].long_desc,
> >>                                  aliases[j].encoding_desc);
> >
> > The encoding_desc will have a PMU with the suffix removed as per
> > skipping duplicates:
> > https://web.git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/util/pmu.c?h=perf-tools-next#n1954
> > So I think something like:
> > ```
> >    br_mis_pred_retired
> >         [Instruction architecturally executed,mispredicted branch. Unit:
> >          armv9_cortex_a510,armv9_cortex_a710]
> > ```
> > Would have an encoding of `armv9_cortex_a510/.../` without the a710
> > encoding being present. That said, I'm not sure anyone cares :-)
> >
> > Thanks,
> > Ian
> >
>
> Ah, in that case I'll disable it for --detailed as well as --json. I
> could compare encoding_desc in pmu_alias_duplicate_type() but they'll
> always be different because of the PMU name, so there's no point. And
> displaying multiple encoding_descs would be fiddly too.

So I'd like not to have the encoding_desc removed. It is useful for
debugging.. I meant by people not caring that the format of that
string is under specified, so the PMU name not being 100% accurate
likely doesn't affect people.

Thanks,
Ian


> >> +               pmu_names[0] = '\0';
> >>   free:
> >>                  zfree(&aliases[j].name);
> >>                  zfree(&aliases[j].alias);
> >> diff --git a/tools/perf/util/print-events.h b/tools/perf/util/print-events.h
> >> index 445efa1636c1..e91f9f830a2a 100644
> >> --- a/tools/perf/util/print-events.h
> >> +++ b/tools/perf/util/print-events.h
> >> @@ -27,6 +27,7 @@ struct print_callbacks {
> >>                          const char *threshold,
> >>                          const char *unit);
> >>          bool (*skip_duplicate_pmus)(void *print_state);
> >> +       bool collapse_events;
> >>   };
> >>
> >>   /** Print all events, the default when no options are specified. */
> >>
> >> --
> >> 2.34.1
> >>
>
Re: [PATCH 2/3] perf list: Collapse similar events across PMUs
Posted by James Clark 11 months ago

On 07/03/2025 5:35 pm, Ian Rogers wrote:
> On Fri, Mar 7, 2025 at 6:08 AM James Clark <james.clark@linaro.org> wrote:
>>
>>
>>
>> On 05/03/2025 9:40 pm, Ian Rogers wrote:
>>> On Tue, Mar 4, 2025 at 5:50 AM James Clark <james.clark@linaro.org> wrote:
>>>>
>>>> Instead of showing multiple line items with the same event name and
>>>> description, show a single line and concatenate all PMUs that this
>>>> event can belong to.
>>>>
>>>> Don't do it for json output. Machine readable output doesn't need to be
>>>> minimized, and changing the "Unit" field to a list type would break
>>>> backwards compatibility.
>>>>
>>>> Before:
>>>>    $ perf list -v
>>>>    ...
>>>>    br_indirect_spec
>>>>          [Branch speculatively executed,indirect branch. Unit: armv8_cortex_a53]
>>>>    br_indirect_spec
>>>>          [Branch speculatively executed,indirect branch. Unit: armv8_cortex_a57]
>>>>
>>>> After:
>>>>
>>>>    $ perf list -v
>>>>    ...
>>>>    br_indirect_spec
>>>>          [Branch speculatively executed,indirect branch. Unit: armv8_cortex_a53,armv8_cortex_a57]
>>>>
>>>> Signed-off-by: James Clark <james.clark@linaro.org>
>>>> ---
>>>>    tools/perf/builtin-list.c      |  2 ++
>>>>    tools/perf/util/pmus.c         | 75 +++++++++++++++++++++++++++++++++++++-----
>>>>    tools/perf/util/print-events.h |  1 +
>>>>    3 files changed, 70 insertions(+), 8 deletions(-)
>>>>
>>>> diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
>>>> index fed482adb039..aacd7beae2a0 100644
>>>> --- a/tools/perf/builtin-list.c
>>>> +++ b/tools/perf/builtin-list.c
>>>> @@ -516,6 +516,7 @@ int cmd_list(int argc, const char **argv)
>>>>                   .print_event = default_print_event,
>>>>                   .print_metric = default_print_metric,
>>>>                   .skip_duplicate_pmus = default_skip_duplicate_pmus,
>>>> +               .collapse_events = true
>>>
>>> So collapse_events is put in the callbacks but isn't a callback. I
>>> think skipping duplicates and collapsing are the same thing, I'd
>>> prefer it if there weren't two terms for the same thing. If you prefer
>>> collapsing as a name then default_skip_duplicate_pmus can be
>>> default_collapse_pmus.
>>>
>>
>> I can use the existing callback and rename it. That does have the effect
>> of disabling this behavior in verbose mode which may be useful or may be
>> unexpected to some people. But it seems pretty 50/50 so fewer callbacks
>> are probably better.
>>
>>>>           };
>>>>           const char *cputype = NULL;
>>>>           const char *unit_name = NULL;
>>>> @@ -574,6 +575,7 @@ int cmd_list(int argc, const char **argv)
>>>>                           .print_event = json_print_event,
>>>>                           .print_metric = json_print_metric,
>>>>                           .skip_duplicate_pmus = json_skip_duplicate_pmus,
>>>> +                       .collapse_events = false
>>>>                   };
>>>>                   ps = &json_ps;
>>>>           } else {
>>>> diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c
>>>> index 4d60bac2d2b9..cb1b14ade25b 100644
>>>> --- a/tools/perf/util/pmus.c
>>>> +++ b/tools/perf/util/pmus.c
>>>> @@ -453,17 +453,50 @@ static int cmp_sevent(const void *a, const void *b)
>>>>           /* Order by PMU name. */
>>>>           if (as->pmu == bs->pmu)
>>>>                   return 0;
>>>> -       return strcmp(as->pmu_name ?: "", bs->pmu_name ?: "");
>>>> +       ret = strcmp(as->pmu_name ?: "", bs->pmu_name ?: "");
>>>> +       if (ret)
>>>> +               return ret;
>>>> +
>>>> +       /* Order by remaining displayed fields for purposes of deduplication later */
>>>> +       ret = strcmp(as->scale_unit ?: "", bs->scale_unit ?: "");
>>>> +       if (ret)
>>>> +               return ret;
>>>> +       ret = !!as->deprecated - !!bs->deprecated;
>>>> +       if (ret)
>>>> +               return ret;
>>>> +       ret = strcmp(as->desc ?: "", bs->desc ?: "");
>>>> +       if (ret)
>>>> +               return ret;
>>>> +       return strcmp(as->long_desc ?: "", bs->long_desc ?: "");
>>>>    }
>>>>
>>>> -static bool pmu_alias_is_duplicate(struct sevent *a, struct sevent *b)
>>>> +enum dup_type {
>>>> +       UNIQUE,
>>>> +       DUPLICATE,
>>>> +       SAME_TEXT
>>>> +};
>>>> +
>>>> +static enum dup_type pmu_alias_duplicate_type(struct sevent *a, struct sevent *b)
>>>>    {
>>>>           /* Different names -> never duplicates */
>>>>           if (strcmp(a->name ?: "//", b->name ?: "//"))
>>>> -               return false;
>>>> +               return UNIQUE;
>>>> +
>>>> +       /* Duplicate PMU name and event name -> hide completely */
>>>> +       if (strcmp(a->pmu_name, b->pmu_name) == 0)
>>>> +               return DUPLICATE;
>>>> +
>>>> +       /* Any other different display text -> not duplicate */
>>>> +       if (strcmp(a->topic ?: "", b->topic ?: "") ||
>>>> +           strcmp(a->scale_unit ?: "", b->scale_unit ?: "") ||
>>>> +           a->deprecated != b->deprecated ||
>>>> +           strcmp(a->desc ?: "", b->desc ?: "") ||
>>>> +           strcmp(a->long_desc ?: "", b->long_desc ?: "")) {
>>>> +               return UNIQUE;
>>>> +       }
>>>>
>>>> -       /* Don't remove duplicates for different PMUs */
>>>> -       return strcmp(a->pmu_name, b->pmu_name) == 0;
>>>> +       /* Same display text but different PMU -> collapse */
>>>> +       return SAME_TEXT;
>>>>    }
>>>>
>>>>    struct events_callback_state {
>>>> @@ -501,6 +534,21 @@ static int perf_pmus__print_pmu_events__callback(void *vstate,
>>>>           return 0;
>>>>    }
>>>>
>>>> +static void concat_pmu_names(char *pmu_names, size_t size, const char *a, const char *b)
>>>> +{
>>>> +       size_t len = strlen(pmu_names);
>>>> +       size_t added;
>>>> +
>>>> +       if (len)
>>>> +               added = snprintf(pmu_names + len, size - len, ",%s", b);
>>>> +       else
>>>> +               added = snprintf(pmu_names, size, "%s,%s", a, b);
>>>> +
>>>> +       /* Truncate with ... */
>>>> +       if (added > 0 && added + len >= size)
>>>> +               sprintf(pmu_names + size - 4, "...");
>>>> +}
>>>> +
>>>>    void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *print_state)
>>>>    {
>>>>           struct perf_pmu *pmu;
>>>> @@ -510,6 +558,7 @@ void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *p
>>>>           struct events_callback_state state;
>>>>           bool skip_duplicate_pmus = print_cb->skip_duplicate_pmus(print_state);
>>>>           struct perf_pmu *(*scan_fn)(struct perf_pmu *);
>>>> +       char pmu_names[128] = {0};
>>>>
>>>>           if (skip_duplicate_pmus)
>>>>                   scan_fn = perf_pmus__scan_skip_duplicates;
>>>> @@ -539,12 +588,21 @@ void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *p
>>>>           qsort(aliases, len, sizeof(struct sevent), cmp_sevent);
>>>>           for (int j = 0; j < len; j++) {
>>>>                   /* Skip duplicates */
>>>> -               if (j < len - 1 && pmu_alias_is_duplicate(&aliases[j], &aliases[j + 1]))
>>>> -                       goto free;
>>>> +               if (j < len - 1) {
>>>> +                       enum dup_type dt = pmu_alias_duplicate_type(&aliases[j], &aliases[j + 1]);
>>>> +
>>>> +                       if (dt == DUPLICATE) {
>>>> +                               goto free;
>>>> +                       } else if (print_cb->collapse_events && dt == SAME_TEXT) {
>>>> +                               concat_pmu_names(pmu_names, sizeof(pmu_names),
>>>> +                                                aliases[j].pmu_name, aliases[j+1].pmu_name);
>>>> +                               goto free;
>>>> +                       }
>>>> +               }
>>>
>>> I think a label called 'free' is a little unfortunate given the
>>> function called free.
>>> When I did things with sevent I was bringing over previous `perf list`
>>> code mainly so that the perf list output before and after the changes
>>> was identical. I wonder if this logic would be better placed in the
>>> callback like default_print_event which already maintains state
>>> (last_topic) to optionally print different things. This may better
>>> encapsulate the behavior rather than the logic being in the PMU code.
>>>
>>
>> Yeah I can have a look at putting it in one of the callbacks. But in the
>> end builtin-list.c is the only user of perf_pmus__print_pmu_events()
>> anyway so makes you wonder if the whole thing can be moved to
>> builtin-list and open coded without the callbackyness.
> 
> I wanted to hide some of the innards of pmus, so I think that's the
> reason it is as it is. The whole `sevent` was pre-existing and
> maintained so that the output was the same.
> 

Looking at this a bit more it is possible to move all of 
perf_pmus__print_pmu_events() (including its dependencies and perf list 
specifics) out of pmus.c into print-events.c without exposing any new 
innards of pmus. Only struct pmu_event_info would be used, but that's 
not private and is already used elsewhere.

It's difficult to do this change only in the print_event callback 
because it relies on having the _next_ event, not the previous event. 
We're already tracking last_topic, and if we start passing all the 
next_* items too it gets a bit unmanageable.

If it's all moved then doing display specific processing across the 
whole list becomes quite easy.

Re: [PATCH 2/3] perf list: Collapse similar events across PMUs
Posted by Ian Rogers 11 months ago
On Tue, Mar 11, 2025 at 7:13 AM James Clark <james.clark@linaro.org> wrote:
>
>
>
> On 07/03/2025 5:35 pm, Ian Rogers wrote:
> > On Fri, Mar 7, 2025 at 6:08 AM James Clark <james.clark@linaro.org> wrote:
> >>
> >>
> >>
> >> On 05/03/2025 9:40 pm, Ian Rogers wrote:
> >>> On Tue, Mar 4, 2025 at 5:50 AM James Clark <james.clark@linaro.org> wrote:
> >>>>
> >>>> Instead of showing multiple line items with the same event name and
> >>>> description, show a single line and concatenate all PMUs that this
> >>>> event can belong to.
> >>>>
> >>>> Don't do it for json output. Machine readable output doesn't need to be
> >>>> minimized, and changing the "Unit" field to a list type would break
> >>>> backwards compatibility.
> >>>>
> >>>> Before:
> >>>>    $ perf list -v
> >>>>    ...
> >>>>    br_indirect_spec
> >>>>          [Branch speculatively executed,indirect branch. Unit: armv8_cortex_a53]
> >>>>    br_indirect_spec
> >>>>          [Branch speculatively executed,indirect branch. Unit: armv8_cortex_a57]
> >>>>
> >>>> After:
> >>>>
> >>>>    $ perf list -v
> >>>>    ...
> >>>>    br_indirect_spec
> >>>>          [Branch speculatively executed,indirect branch. Unit: armv8_cortex_a53,armv8_cortex_a57]
> >>>>
> >>>> Signed-off-by: James Clark <james.clark@linaro.org>
> >>>> ---
> >>>>    tools/perf/builtin-list.c      |  2 ++
> >>>>    tools/perf/util/pmus.c         | 75 +++++++++++++++++++++++++++++++++++++-----
> >>>>    tools/perf/util/print-events.h |  1 +
> >>>>    3 files changed, 70 insertions(+), 8 deletions(-)
> >>>>
> >>>> diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
> >>>> index fed482adb039..aacd7beae2a0 100644
> >>>> --- a/tools/perf/builtin-list.c
> >>>> +++ b/tools/perf/builtin-list.c
> >>>> @@ -516,6 +516,7 @@ int cmd_list(int argc, const char **argv)
> >>>>                   .print_event = default_print_event,
> >>>>                   .print_metric = default_print_metric,
> >>>>                   .skip_duplicate_pmus = default_skip_duplicate_pmus,
> >>>> +               .collapse_events = true
> >>>
> >>> So collapse_events is put in the callbacks but isn't a callback. I
> >>> think skipping duplicates and collapsing are the same thing, I'd
> >>> prefer it if there weren't two terms for the same thing. If you prefer
> >>> collapsing as a name then default_skip_duplicate_pmus can be
> >>> default_collapse_pmus.
> >>>
> >>
> >> I can use the existing callback and rename it. That does have the effect
> >> of disabling this behavior in verbose mode which may be useful or may be
> >> unexpected to some people. But it seems pretty 50/50 so fewer callbacks
> >> are probably better.
> >>
> >>>>           };
> >>>>           const char *cputype = NULL;
> >>>>           const char *unit_name = NULL;
> >>>> @@ -574,6 +575,7 @@ int cmd_list(int argc, const char **argv)
> >>>>                           .print_event = json_print_event,
> >>>>                           .print_metric = json_print_metric,
> >>>>                           .skip_duplicate_pmus = json_skip_duplicate_pmus,
> >>>> +                       .collapse_events = false
> >>>>                   };
> >>>>                   ps = &json_ps;
> >>>>           } else {
> >>>> diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c
> >>>> index 4d60bac2d2b9..cb1b14ade25b 100644
> >>>> --- a/tools/perf/util/pmus.c
> >>>> +++ b/tools/perf/util/pmus.c
> >>>> @@ -453,17 +453,50 @@ static int cmp_sevent(const void *a, const void *b)
> >>>>           /* Order by PMU name. */
> >>>>           if (as->pmu == bs->pmu)
> >>>>                   return 0;
> >>>> -       return strcmp(as->pmu_name ?: "", bs->pmu_name ?: "");
> >>>> +       ret = strcmp(as->pmu_name ?: "", bs->pmu_name ?: "");
> >>>> +       if (ret)
> >>>> +               return ret;
> >>>> +
> >>>> +       /* Order by remaining displayed fields for purposes of deduplication later */
> >>>> +       ret = strcmp(as->scale_unit ?: "", bs->scale_unit ?: "");
> >>>> +       if (ret)
> >>>> +               return ret;
> >>>> +       ret = !!as->deprecated - !!bs->deprecated;
> >>>> +       if (ret)
> >>>> +               return ret;
> >>>> +       ret = strcmp(as->desc ?: "", bs->desc ?: "");
> >>>> +       if (ret)
> >>>> +               return ret;
> >>>> +       return strcmp(as->long_desc ?: "", bs->long_desc ?: "");
> >>>>    }
> >>>>
> >>>> -static bool pmu_alias_is_duplicate(struct sevent *a, struct sevent *b)
> >>>> +enum dup_type {
> >>>> +       UNIQUE,
> >>>> +       DUPLICATE,
> >>>> +       SAME_TEXT
> >>>> +};
> >>>> +
> >>>> +static enum dup_type pmu_alias_duplicate_type(struct sevent *a, struct sevent *b)
> >>>>    {
> >>>>           /* Different names -> never duplicates */
> >>>>           if (strcmp(a->name ?: "//", b->name ?: "//"))
> >>>> -               return false;
> >>>> +               return UNIQUE;
> >>>> +
> >>>> +       /* Duplicate PMU name and event name -> hide completely */
> >>>> +       if (strcmp(a->pmu_name, b->pmu_name) == 0)
> >>>> +               return DUPLICATE;
> >>>> +
> >>>> +       /* Any other different display text -> not duplicate */
> >>>> +       if (strcmp(a->topic ?: "", b->topic ?: "") ||
> >>>> +           strcmp(a->scale_unit ?: "", b->scale_unit ?: "") ||
> >>>> +           a->deprecated != b->deprecated ||
> >>>> +           strcmp(a->desc ?: "", b->desc ?: "") ||
> >>>> +           strcmp(a->long_desc ?: "", b->long_desc ?: "")) {
> >>>> +               return UNIQUE;
> >>>> +       }
> >>>>
> >>>> -       /* Don't remove duplicates for different PMUs */
> >>>> -       return strcmp(a->pmu_name, b->pmu_name) == 0;
> >>>> +       /* Same display text but different PMU -> collapse */
> >>>> +       return SAME_TEXT;
> >>>>    }
> >>>>
> >>>>    struct events_callback_state {
> >>>> @@ -501,6 +534,21 @@ static int perf_pmus__print_pmu_events__callback(void *vstate,
> >>>>           return 0;
> >>>>    }
> >>>>
> >>>> +static void concat_pmu_names(char *pmu_names, size_t size, const char *a, const char *b)
> >>>> +{
> >>>> +       size_t len = strlen(pmu_names);
> >>>> +       size_t added;
> >>>> +
> >>>> +       if (len)
> >>>> +               added = snprintf(pmu_names + len, size - len, ",%s", b);
> >>>> +       else
> >>>> +               added = snprintf(pmu_names, size, "%s,%s", a, b);
> >>>> +
> >>>> +       /* Truncate with ... */
> >>>> +       if (added > 0 && added + len >= size)
> >>>> +               sprintf(pmu_names + size - 4, "...");
> >>>> +}
> >>>> +
> >>>>    void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *print_state)
> >>>>    {
> >>>>           struct perf_pmu *pmu;
> >>>> @@ -510,6 +558,7 @@ void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *p
> >>>>           struct events_callback_state state;
> >>>>           bool skip_duplicate_pmus = print_cb->skip_duplicate_pmus(print_state);
> >>>>           struct perf_pmu *(*scan_fn)(struct perf_pmu *);
> >>>> +       char pmu_names[128] = {0};
> >>>>
> >>>>           if (skip_duplicate_pmus)
> >>>>                   scan_fn = perf_pmus__scan_skip_duplicates;
> >>>> @@ -539,12 +588,21 @@ void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *p
> >>>>           qsort(aliases, len, sizeof(struct sevent), cmp_sevent);
> >>>>           for (int j = 0; j < len; j++) {
> >>>>                   /* Skip duplicates */
> >>>> -               if (j < len - 1 && pmu_alias_is_duplicate(&aliases[j], &aliases[j + 1]))
> >>>> -                       goto free;
> >>>> +               if (j < len - 1) {
> >>>> +                       enum dup_type dt = pmu_alias_duplicate_type(&aliases[j], &aliases[j + 1]);
> >>>> +
> >>>> +                       if (dt == DUPLICATE) {
> >>>> +                               goto free;
> >>>> +                       } else if (print_cb->collapse_events && dt == SAME_TEXT) {
> >>>> +                               concat_pmu_names(pmu_names, sizeof(pmu_names),
> >>>> +                                                aliases[j].pmu_name, aliases[j+1].pmu_name);
> >>>> +                               goto free;
> >>>> +                       }
> >>>> +               }
> >>>
> >>> I think a label called 'free' is a little unfortunate given the
> >>> function called free.
> >>> When I did things with sevent I was bringing over previous `perf list`
> >>> code mainly so that the perf list output before and after the changes
> >>> was identical. I wonder if this logic would be better placed in the
> >>> callback like default_print_event which already maintains state
> >>> (last_topic) to optionally print different things. This may better
> >>> encapsulate the behavior rather than the logic being in the PMU code.
> >>>
> >>
> >> Yeah I can have a look at putting it in one of the callbacks. But in the
> >> end builtin-list.c is the only user of perf_pmus__print_pmu_events()
> >> anyway so makes you wonder if the whole thing can be moved to
> >> builtin-list and open coded without the callbackyness.
> >
> > I wanted to hide some of the innards of pmus, so I think that's the
> > reason it is as it is. The whole `sevent` was pre-existing and
> > maintained so that the output was the same.
> >
>
> Looking at this a bit more it is possible to move all of
> perf_pmus__print_pmu_events() (including its dependencies and perf list
> specifics) out of pmus.c into print-events.c without exposing any new
> innards of pmus. Only struct pmu_event_info would be used, but that's
> not private and is already used elsewhere.
>
> It's difficult to do this change only in the print_event callback
> because it relies on having the _next_ event, not the previous event.
> We're already tracking last_topic, and if we start passing all the
> next_* items too it gets a bit unmanageable.
>
> If it's all moved then doing display specific processing across the
> whole list becomes quite easy.

I'm not sure I follow all of this. If things can be moved to a more
obvious location, printing code in print-events.c, and we maintain
encapsulation then that sounds great to me. I'm not clear on the next
event issue. My hope with the print routines in builtin-list.c was
that anyone could come along and add another for whatever rendering
took their fancy. I didn't want it to be like the logic in
stat-display.c, where there are multiple levels of call backs, global
state, odd things like passing NULL meaning display column headers,
and the whole thing being a confusing rats nest where a change nearly
always ripples through and breaks something somewhere. Particularly
compare the json output in stat-display.c and builtin-list.c, my hope
was that builtin-list.c would be the model for reimplementing the
stat-display.c one. Others may have different opinions.

Thanks,
Ian
Re: [PATCH 2/3] perf list: Collapse similar events across PMUs
Posted by James Clark 11 months ago

On 11/03/2025 3:14 pm, Ian Rogers wrote:
> On Tue, Mar 11, 2025 at 7:13 AM James Clark <james.clark@linaro.org> wrote:
>>
>>
>>
>> On 07/03/2025 5:35 pm, Ian Rogers wrote:
>>> On Fri, Mar 7, 2025 at 6:08 AM James Clark <james.clark@linaro.org> wrote:
>>>>
>>>>
>>>>
>>>> On 05/03/2025 9:40 pm, Ian Rogers wrote:
>>>>> On Tue, Mar 4, 2025 at 5:50 AM James Clark <james.clark@linaro.org> wrote:
>>>>>>
>>>>>> Instead of showing multiple line items with the same event name and
>>>>>> description, show a single line and concatenate all PMUs that this
>>>>>> event can belong to.
>>>>>>
>>>>>> Don't do it for json output. Machine readable output doesn't need to be
>>>>>> minimized, and changing the "Unit" field to a list type would break
>>>>>> backwards compatibility.
>>>>>>
>>>>>> Before:
>>>>>>     $ perf list -v
>>>>>>     ...
>>>>>>     br_indirect_spec
>>>>>>           [Branch speculatively executed,indirect branch. Unit: armv8_cortex_a53]
>>>>>>     br_indirect_spec
>>>>>>           [Branch speculatively executed,indirect branch. Unit: armv8_cortex_a57]
>>>>>>
>>>>>> After:
>>>>>>
>>>>>>     $ perf list -v
>>>>>>     ...
>>>>>>     br_indirect_spec
>>>>>>           [Branch speculatively executed,indirect branch. Unit: armv8_cortex_a53,armv8_cortex_a57]
>>>>>>
>>>>>> Signed-off-by: James Clark <james.clark@linaro.org>
>>>>>> ---
>>>>>>     tools/perf/builtin-list.c      |  2 ++
>>>>>>     tools/perf/util/pmus.c         | 75 +++++++++++++++++++++++++++++++++++++-----
>>>>>>     tools/perf/util/print-events.h |  1 +
>>>>>>     3 files changed, 70 insertions(+), 8 deletions(-)
>>>>>>
>>>>>> diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
>>>>>> index fed482adb039..aacd7beae2a0 100644
>>>>>> --- a/tools/perf/builtin-list.c
>>>>>> +++ b/tools/perf/builtin-list.c
>>>>>> @@ -516,6 +516,7 @@ int cmd_list(int argc, const char **argv)
>>>>>>                    .print_event = default_print_event,
>>>>>>                    .print_metric = default_print_metric,
>>>>>>                    .skip_duplicate_pmus = default_skip_duplicate_pmus,
>>>>>> +               .collapse_events = true
>>>>>
>>>>> So collapse_events is put in the callbacks but isn't a callback. I
>>>>> think skipping duplicates and collapsing are the same thing, I'd
>>>>> prefer it if there weren't two terms for the same thing. If you prefer
>>>>> collapsing as a name then default_skip_duplicate_pmus can be
>>>>> default_collapse_pmus.
>>>>>
>>>>
>>>> I can use the existing callback and rename it. That does have the effect
>>>> of disabling this behavior in verbose mode which may be useful or may be
>>>> unexpected to some people. But it seems pretty 50/50 so fewer callbacks
>>>> are probably better.
>>>>
>>>>>>            };
>>>>>>            const char *cputype = NULL;
>>>>>>            const char *unit_name = NULL;
>>>>>> @@ -574,6 +575,7 @@ int cmd_list(int argc, const char **argv)
>>>>>>                            .print_event = json_print_event,
>>>>>>                            .print_metric = json_print_metric,
>>>>>>                            .skip_duplicate_pmus = json_skip_duplicate_pmus,
>>>>>> +                       .collapse_events = false
>>>>>>                    };
>>>>>>                    ps = &json_ps;
>>>>>>            } else {
>>>>>> diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c
>>>>>> index 4d60bac2d2b9..cb1b14ade25b 100644
>>>>>> --- a/tools/perf/util/pmus.c
>>>>>> +++ b/tools/perf/util/pmus.c
>>>>>> @@ -453,17 +453,50 @@ static int cmp_sevent(const void *a, const void *b)
>>>>>>            /* Order by PMU name. */
>>>>>>            if (as->pmu == bs->pmu)
>>>>>>                    return 0;
>>>>>> -       return strcmp(as->pmu_name ?: "", bs->pmu_name ?: "");
>>>>>> +       ret = strcmp(as->pmu_name ?: "", bs->pmu_name ?: "");
>>>>>> +       if (ret)
>>>>>> +               return ret;
>>>>>> +
>>>>>> +       /* Order by remaining displayed fields for purposes of deduplication later */
>>>>>> +       ret = strcmp(as->scale_unit ?: "", bs->scale_unit ?: "");
>>>>>> +       if (ret)
>>>>>> +               return ret;
>>>>>> +       ret = !!as->deprecated - !!bs->deprecated;
>>>>>> +       if (ret)
>>>>>> +               return ret;
>>>>>> +       ret = strcmp(as->desc ?: "", bs->desc ?: "");
>>>>>> +       if (ret)
>>>>>> +               return ret;
>>>>>> +       return strcmp(as->long_desc ?: "", bs->long_desc ?: "");
>>>>>>     }
>>>>>>
>>>>>> -static bool pmu_alias_is_duplicate(struct sevent *a, struct sevent *b)
>>>>>> +enum dup_type {
>>>>>> +       UNIQUE,
>>>>>> +       DUPLICATE,
>>>>>> +       SAME_TEXT
>>>>>> +};
>>>>>> +
>>>>>> +static enum dup_type pmu_alias_duplicate_type(struct sevent *a, struct sevent *b)
>>>>>>     {
>>>>>>            /* Different names -> never duplicates */
>>>>>>            if (strcmp(a->name ?: "//", b->name ?: "//"))
>>>>>> -               return false;
>>>>>> +               return UNIQUE;
>>>>>> +
>>>>>> +       /* Duplicate PMU name and event name -> hide completely */
>>>>>> +       if (strcmp(a->pmu_name, b->pmu_name) == 0)
>>>>>> +               return DUPLICATE;
>>>>>> +
>>>>>> +       /* Any other different display text -> not duplicate */
>>>>>> +       if (strcmp(a->topic ?: "", b->topic ?: "") ||
>>>>>> +           strcmp(a->scale_unit ?: "", b->scale_unit ?: "") ||
>>>>>> +           a->deprecated != b->deprecated ||
>>>>>> +           strcmp(a->desc ?: "", b->desc ?: "") ||
>>>>>> +           strcmp(a->long_desc ?: "", b->long_desc ?: "")) {
>>>>>> +               return UNIQUE;
>>>>>> +       }
>>>>>>
>>>>>> -       /* Don't remove duplicates for different PMUs */
>>>>>> -       return strcmp(a->pmu_name, b->pmu_name) == 0;
>>>>>> +       /* Same display text but different PMU -> collapse */
>>>>>> +       return SAME_TEXT;
>>>>>>     }
>>>>>>
>>>>>>     struct events_callback_state {
>>>>>> @@ -501,6 +534,21 @@ static int perf_pmus__print_pmu_events__callback(void *vstate,
>>>>>>            return 0;
>>>>>>     }
>>>>>>
>>>>>> +static void concat_pmu_names(char *pmu_names, size_t size, const char *a, const char *b)
>>>>>> +{
>>>>>> +       size_t len = strlen(pmu_names);
>>>>>> +       size_t added;
>>>>>> +
>>>>>> +       if (len)
>>>>>> +               added = snprintf(pmu_names + len, size - len, ",%s", b);
>>>>>> +       else
>>>>>> +               added = snprintf(pmu_names, size, "%s,%s", a, b);
>>>>>> +
>>>>>> +       /* Truncate with ... */
>>>>>> +       if (added > 0 && added + len >= size)
>>>>>> +               sprintf(pmu_names + size - 4, "...");
>>>>>> +}
>>>>>> +
>>>>>>     void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *print_state)
>>>>>>     {
>>>>>>            struct perf_pmu *pmu;
>>>>>> @@ -510,6 +558,7 @@ void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *p
>>>>>>            struct events_callback_state state;
>>>>>>            bool skip_duplicate_pmus = print_cb->skip_duplicate_pmus(print_state);
>>>>>>            struct perf_pmu *(*scan_fn)(struct perf_pmu *);
>>>>>> +       char pmu_names[128] = {0};
>>>>>>
>>>>>>            if (skip_duplicate_pmus)
>>>>>>                    scan_fn = perf_pmus__scan_skip_duplicates;
>>>>>> @@ -539,12 +588,21 @@ void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *p
>>>>>>            qsort(aliases, len, sizeof(struct sevent), cmp_sevent);
>>>>>>            for (int j = 0; j < len; j++) {
>>>>>>                    /* Skip duplicates */
>>>>>> -               if (j < len - 1 && pmu_alias_is_duplicate(&aliases[j], &aliases[j + 1]))
>>>>>> -                       goto free;
>>>>>> +               if (j < len - 1) {
>>>>>> +                       enum dup_type dt = pmu_alias_duplicate_type(&aliases[j], &aliases[j + 1]);
>>>>>> +
>>>>>> +                       if (dt == DUPLICATE) {
>>>>>> +                               goto free;
>>>>>> +                       } else if (print_cb->collapse_events && dt == SAME_TEXT) {
>>>>>> +                               concat_pmu_names(pmu_names, sizeof(pmu_names),
>>>>>> +                                                aliases[j].pmu_name, aliases[j+1].pmu_name);
>>>>>> +                               goto free;
>>>>>> +                       }
>>>>>> +               }
>>>>>
>>>>> I think a label called 'free' is a little unfortunate given the
>>>>> function called free.
>>>>> When I did things with sevent I was bringing over previous `perf list`
>>>>> code mainly so that the perf list output before and after the changes
>>>>> was identical. I wonder if this logic would be better placed in the
>>>>> callback like default_print_event which already maintains state
>>>>> (last_topic) to optionally print different things. This may better
>>>>> encapsulate the behavior rather than the logic being in the PMU code.
>>>>>
>>>>
>>>> Yeah I can have a look at putting it in one of the callbacks. But in the
>>>> end builtin-list.c is the only user of perf_pmus__print_pmu_events()
>>>> anyway so makes you wonder if the whole thing can be moved to
>>>> builtin-list and open coded without the callbackyness.
>>>
>>> I wanted to hide some of the innards of pmus, so I think that's the
>>> reason it is as it is. The whole `sevent` was pre-existing and
>>> maintained so that the output was the same.
>>>
>>
>> Looking at this a bit more it is possible to move all of
>> perf_pmus__print_pmu_events() (including its dependencies and perf list
>> specifics) out of pmus.c into print-events.c without exposing any new
>> innards of pmus. Only struct pmu_event_info would be used, but that's
>> not private and is already used elsewhere.
>>
>> It's difficult to do this change only in the print_event callback
>> because it relies on having the _next_ event, not the previous event.
>> We're already tracking last_topic, and if we start passing all the
>> next_* items too it gets a bit unmanageable.
>>
>> If it's all moved then doing display specific processing across the
>> whole list becomes quite easy.
> 
> I'm not sure I follow all of this. If things can be moved to a more
> obvious location, printing code in print-events.c, and we maintain
> encapsulation then that sounds great to me.

I'll give it a go, I think I can come up with something.

> I'm not clear on the next event issue.

For example, pmu_alias_is_duplicate() needs the current and next event, 
so it's not easy to move that behavior to builtin-list.c. And my change 
to collapse similar events across PMUs also requires the next rather 
than previous event.

> My hope with the print routines in builtin-list.c was
> that anyone could come along and add another for whatever rendering
> took their fancy. I didn't want it to be like the logic in
> stat-display.c, where there are multiple levels of call backs, global
> state, odd things like passing NULL meaning display column headers,
> and the whole thing being a confusing rats nest where a change nearly
> always ripples through and breaks something somewhere. Particularly
> compare the json output in stat-display.c and builtin-list.c, my hope
> was that builtin-list.c would be the model for reimplementing the
> stat-display.c one. Others may have different opinions.
> 
> Thanks,
> Ian

Makes sense. If I make pmus.c return a list then anyone can loop over 
the list and do whatever display they want. It removes the need for the 
callback but does require that the consumer know the type of the list 
item. It could be struct sevent or even drop that and directly use 
struct pmu_event_info.

Having the whole list gives you a lot more flexibility than just the 
small window that the print event callback gives you.


Re: [PATCH 2/3] perf list: Collapse similar events across PMUs
Posted by James Clark 11 months ago

On 07/03/2025 5:35 pm, Ian Rogers wrote:
> On Fri, Mar 7, 2025 at 6:08 AM James Clark <james.clark@linaro.org> wrote:
>>
>>
>>
>> On 05/03/2025 9:40 pm, Ian Rogers wrote:
>>> On Tue, Mar 4, 2025 at 5:50 AM James Clark <james.clark@linaro.org> wrote:
>>>>
>>>> Instead of showing multiple line items with the same event name and
>>>> description, show a single line and concatenate all PMUs that this
>>>> event can belong to.
>>>>
>>>> Don't do it for json output. Machine readable output doesn't need to be
>>>> minimized, and changing the "Unit" field to a list type would break
>>>> backwards compatibility.
>>>>
>>>> Before:
>>>>    $ perf list -v
>>>>    ...
>>>>    br_indirect_spec
>>>>          [Branch speculatively executed,indirect branch. Unit: armv8_cortex_a53]
>>>>    br_indirect_spec
>>>>          [Branch speculatively executed,indirect branch. Unit: armv8_cortex_a57]
>>>>
>>>> After:
>>>>
>>>>    $ perf list -v
>>>>    ...
>>>>    br_indirect_spec
>>>>          [Branch speculatively executed,indirect branch. Unit: armv8_cortex_a53,armv8_cortex_a57]
>>>>
>>>> Signed-off-by: James Clark <james.clark@linaro.org>
>>>> ---
>>>>    tools/perf/builtin-list.c      |  2 ++
>>>>    tools/perf/util/pmus.c         | 75 +++++++++++++++++++++++++++++++++++++-----
>>>>    tools/perf/util/print-events.h |  1 +
>>>>    3 files changed, 70 insertions(+), 8 deletions(-)
>>>>
>>>> diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
>>>> index fed482adb039..aacd7beae2a0 100644
>>>> --- a/tools/perf/builtin-list.c
>>>> +++ b/tools/perf/builtin-list.c
>>>> @@ -516,6 +516,7 @@ int cmd_list(int argc, const char **argv)
>>>>                   .print_event = default_print_event,
>>>>                   .print_metric = default_print_metric,
>>>>                   .skip_duplicate_pmus = default_skip_duplicate_pmus,
>>>> +               .collapse_events = true
>>>
>>> So collapse_events is put in the callbacks but isn't a callback. I
>>> think skipping duplicates and collapsing are the same thing, I'd
>>> prefer it if there weren't two terms for the same thing. If you prefer
>>> collapsing as a name then default_skip_duplicate_pmus can be
>>> default_collapse_pmus.
>>>
>>
>> I can use the existing callback and rename it. That does have the effect
>> of disabling this behavior in verbose mode which may be useful or may be
>> unexpected to some people. But it seems pretty 50/50 so fewer callbacks
>> are probably better.
>>
>>>>           };
>>>>           const char *cputype = NULL;
>>>>           const char *unit_name = NULL;
>>>> @@ -574,6 +575,7 @@ int cmd_list(int argc, const char **argv)
>>>>                           .print_event = json_print_event,
>>>>                           .print_metric = json_print_metric,
>>>>                           .skip_duplicate_pmus = json_skip_duplicate_pmus,
>>>> +                       .collapse_events = false
>>>>                   };
>>>>                   ps = &json_ps;
>>>>           } else {
>>>> diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c
>>>> index 4d60bac2d2b9..cb1b14ade25b 100644
>>>> --- a/tools/perf/util/pmus.c
>>>> +++ b/tools/perf/util/pmus.c
>>>> @@ -453,17 +453,50 @@ static int cmp_sevent(const void *a, const void *b)
>>>>           /* Order by PMU name. */
>>>>           if (as->pmu == bs->pmu)
>>>>                   return 0;
>>>> -       return strcmp(as->pmu_name ?: "", bs->pmu_name ?: "");
>>>> +       ret = strcmp(as->pmu_name ?: "", bs->pmu_name ?: "");
>>>> +       if (ret)
>>>> +               return ret;
>>>> +
>>>> +       /* Order by remaining displayed fields for purposes of deduplication later */
>>>> +       ret = strcmp(as->scale_unit ?: "", bs->scale_unit ?: "");
>>>> +       if (ret)
>>>> +               return ret;
>>>> +       ret = !!as->deprecated - !!bs->deprecated;
>>>> +       if (ret)
>>>> +               return ret;
>>>> +       ret = strcmp(as->desc ?: "", bs->desc ?: "");
>>>> +       if (ret)
>>>> +               return ret;
>>>> +       return strcmp(as->long_desc ?: "", bs->long_desc ?: "");
>>>>    }
>>>>
>>>> -static bool pmu_alias_is_duplicate(struct sevent *a, struct sevent *b)
>>>> +enum dup_type {
>>>> +       UNIQUE,
>>>> +       DUPLICATE,
>>>> +       SAME_TEXT
>>>> +};
>>>> +
>>>> +static enum dup_type pmu_alias_duplicate_type(struct sevent *a, struct sevent *b)
>>>>    {
>>>>           /* Different names -> never duplicates */
>>>>           if (strcmp(a->name ?: "//", b->name ?: "//"))
>>>> -               return false;
>>>> +               return UNIQUE;
>>>> +
>>>> +       /* Duplicate PMU name and event name -> hide completely */
>>>> +       if (strcmp(a->pmu_name, b->pmu_name) == 0)
>>>> +               return DUPLICATE;
>>>> +
>>>> +       /* Any other different display text -> not duplicate */
>>>> +       if (strcmp(a->topic ?: "", b->topic ?: "") ||
>>>> +           strcmp(a->scale_unit ?: "", b->scale_unit ?: "") ||
>>>> +           a->deprecated != b->deprecated ||
>>>> +           strcmp(a->desc ?: "", b->desc ?: "") ||
>>>> +           strcmp(a->long_desc ?: "", b->long_desc ?: "")) {
>>>> +               return UNIQUE;
>>>> +       }
>>>>
>>>> -       /* Don't remove duplicates for different PMUs */
>>>> -       return strcmp(a->pmu_name, b->pmu_name) == 0;
>>>> +       /* Same display text but different PMU -> collapse */
>>>> +       return SAME_TEXT;
>>>>    }
>>>>
>>>>    struct events_callback_state {
>>>> @@ -501,6 +534,21 @@ static int perf_pmus__print_pmu_events__callback(void *vstate,
>>>>           return 0;
>>>>    }
>>>>
>>>> +static void concat_pmu_names(char *pmu_names, size_t size, const char *a, const char *b)
>>>> +{
>>>> +       size_t len = strlen(pmu_names);
>>>> +       size_t added;
>>>> +
>>>> +       if (len)
>>>> +               added = snprintf(pmu_names + len, size - len, ",%s", b);
>>>> +       else
>>>> +               added = snprintf(pmu_names, size, "%s,%s", a, b);
>>>> +
>>>> +       /* Truncate with ... */
>>>> +       if (added > 0 && added + len >= size)
>>>> +               sprintf(pmu_names + size - 4, "...");
>>>> +}
>>>> +
>>>>    void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *print_state)
>>>>    {
>>>>           struct perf_pmu *pmu;
>>>> @@ -510,6 +558,7 @@ void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *p
>>>>           struct events_callback_state state;
>>>>           bool skip_duplicate_pmus = print_cb->skip_duplicate_pmus(print_state);
>>>>           struct perf_pmu *(*scan_fn)(struct perf_pmu *);
>>>> +       char pmu_names[128] = {0};
>>>>
>>>>           if (skip_duplicate_pmus)
>>>>                   scan_fn = perf_pmus__scan_skip_duplicates;
>>>> @@ -539,12 +588,21 @@ void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *p
>>>>           qsort(aliases, len, sizeof(struct sevent), cmp_sevent);
>>>>           for (int j = 0; j < len; j++) {
>>>>                   /* Skip duplicates */
>>>> -               if (j < len - 1 && pmu_alias_is_duplicate(&aliases[j], &aliases[j + 1]))
>>>> -                       goto free;
>>>> +               if (j < len - 1) {
>>>> +                       enum dup_type dt = pmu_alias_duplicate_type(&aliases[j], &aliases[j + 1]);
>>>> +
>>>> +                       if (dt == DUPLICATE) {
>>>> +                               goto free;
>>>> +                       } else if (print_cb->collapse_events && dt == SAME_TEXT) {
>>>> +                               concat_pmu_names(pmu_names, sizeof(pmu_names),
>>>> +                                                aliases[j].pmu_name, aliases[j+1].pmu_name);
>>>> +                               goto free;
>>>> +                       }
>>>> +               }
>>>
>>> I think a label called 'free' is a little unfortunate given the
>>> function called free.
>>> When I did things with sevent I was bringing over previous `perf list`
>>> code mainly so that the perf list output before and after the changes
>>> was identical. I wonder if this logic would be better placed in the
>>> callback like default_print_event which already maintains state
>>> (last_topic) to optionally print different things. This may better
>>> encapsulate the behavior rather than the logic being in the PMU code.
>>>
>>
>> Yeah I can have a look at putting it in one of the callbacks. But in the
>> end builtin-list.c is the only user of perf_pmus__print_pmu_events()
>> anyway so makes you wonder if the whole thing can be moved to
>> builtin-list and open coded without the callbackyness.
> 
> I wanted to hide some of the innards of pmus, so I think that's the
> reason it is as it is. The whole `sevent` was pre-existing and
> maintained so that the output was the same.
> 
>>>>
>>>>                   print_cb->print_event(print_state,
>>>>                                   aliases[j].topic,
>>>> -                               aliases[j].pmu_name,
>>>> +                               pmu_names[0] ? pmu_names : aliases[j].pmu_name,
>>>>                                   aliases[j].name,
>>>>                                   aliases[j].alias,
>>>>                                   aliases[j].scale_unit,
>>>> @@ -553,6 +611,7 @@ void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *p
>>>>                                   aliases[j].desc,
>>>>                                   aliases[j].long_desc,
>>>>                                   aliases[j].encoding_desc);
>>>
>>> The encoding_desc will have a PMU with the suffix removed as per
>>> skipping duplicates:
>>> https://web.git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/util/pmu.c?h=perf-tools-next#n1954
>>> So I think something like:
>>> ```
>>>     br_mis_pred_retired
>>>          [Instruction architecturally executed,mispredicted branch. Unit:
>>>           armv9_cortex_a510,armv9_cortex_a710]
>>> ```
>>> Would have an encoding of `armv9_cortex_a510/.../` without the a710
>>> encoding being present. That said, I'm not sure anyone cares :-)
>>>
>>> Thanks,
>>> Ian
>>>
>>
>> Ah, in that case I'll disable it for --detailed as well as --json. I
>> could compare encoding_desc in pmu_alias_duplicate_type() but they'll
>> always be different because of the PMU name, so there's no point. And
>> displaying multiple encoding_descs would be fiddly too.
> 
> So I'd like not to have the encoding_desc removed. It is useful for
> debugging.. I meant by people not caring that the format of that
> string is under specified, so the PMU name not being 100% accurate
> likely doesn't affect people.
> 
> Thanks,
> Ian
> 
> 

By 'disabled' I meant disable this collapsing of PMU names, so with 
--detailed you'll get the existing full list of events with their 
encoding_descs, no change there. Sorry for the confusion.

>>>> +               pmu_names[0] = '\0';
>>>>    free:
>>>>                   zfree(&aliases[j].name);
>>>>                   zfree(&aliases[j].alias);
>>>> diff --git a/tools/perf/util/print-events.h b/tools/perf/util/print-events.h
>>>> index 445efa1636c1..e91f9f830a2a 100644
>>>> --- a/tools/perf/util/print-events.h
>>>> +++ b/tools/perf/util/print-events.h
>>>> @@ -27,6 +27,7 @@ struct print_callbacks {
>>>>                           const char *threshold,
>>>>                           const char *unit);
>>>>           bool (*skip_duplicate_pmus)(void *print_state);
>>>> +       bool collapse_events;
>>>>    };
>>>>
>>>>    /** Print all events, the default when no options are specified. */
>>>>
>>>> --
>>>> 2.34.1
>>>>
>>

Re: [PATCH 2/3] perf list: Collapse similar events across PMUs
Posted by Leo Yan 11 months, 1 week ago
On Tue, Mar 04, 2025 at 01:49:14PM +0000, James Clark wrote:
> Instead of showing multiple line items with the same event name and
> description, show a single line and concatenate all PMUs that this
> event can belong to.
> 
> Don't do it for json output. Machine readable output doesn't need to be
> minimized, and changing the "Unit" field to a list type would break
> backwards compatibility.
> 
> Before:
>  $ perf list -v
>  ...
>  br_indirect_spec
>        [Branch speculatively executed,indirect branch. Unit: armv8_cortex_a53]
>  br_indirect_spec
>        [Branch speculatively executed,indirect branch. Unit: armv8_cortex_a57]
> 
> After:
> 
>  $ perf list -v
>  ...
>  br_indirect_spec
>        [Branch speculatively executed,indirect branch. Unit: armv8_cortex_a53,armv8_cortex_a57]

From my testing, based on the latest tmp.perf-tools-next branch, I
need to remove the option '-v'.  Otherwise, the '-v' option will
enable long event descriptions, in this case, it does _not_ print Unit
strings at all (see default_print_event() in builtin-list.c).

  default_print_event()
  {
    ...
    if (long_desc && print_state->long_desc) {
            fprintf(fp, "%*s", 8, "[");
            wordwrap(fp, long_desc, 8, pager_get_columns(), 0);
            fprintf(fp, "]\n");
    } else if (desc && print_state->desc) {
    ...
  }

The command `perf list` would be sufficent.

Thanks,
Leo
Re: [PATCH 2/3] perf list: Collapse similar events across PMUs
Posted by Leo Yan 11 months, 1 week ago
On Wed, Mar 05, 2025 at 09:35:45AM +0000, Leo Yan wrote:
> On Tue, Mar 04, 2025 at 01:49:14PM +0000, James Clark wrote:
> > Instead of showing multiple line items with the same event name and
> > description, show a single line and concatenate all PMUs that this
> > event can belong to.
> > 
> > Don't do it for json output. Machine readable output doesn't need to be
> > minimized, and changing the "Unit" field to a list type would break
> > backwards compatibility.
> > 
> > Before:
> >  $ perf list -v
> >  ...
> >  br_indirect_spec
> >        [Branch speculatively executed,indirect branch. Unit: armv8_cortex_a53]
> >  br_indirect_spec
> >        [Branch speculatively executed,indirect branch. Unit: armv8_cortex_a57]
> > 
> > After:
> > 
> >  $ perf list -v
> >  ...
> >  br_indirect_spec
> >        [Branch speculatively executed,indirect branch. Unit: armv8_cortex_a53,armv8_cortex_a57]
> 
> From my testing, based on the latest tmp.perf-tools-next branch, I
> need to remove the option '-v'.  Otherwise, the '-v' option will
> enable long event descriptions, in this case, it does _not_ print Unit
> strings at all (see default_print_event() in builtin-list.c).

Ah, actually I worked on the Linux master branch.  After switched to
tmp.perf-tools-next branch, it works pretty well.

  perf list -v 2>&1 | grep br_mis_pred_retired -A 2
  ...
    br_mis_pred_retired
           [Instruction architecturally executed,mispredicted branch.
           Unit:
                   armv9_cortex_a510,armv9_cortex_a710]

Please ignore my previous comment.  The series looks good to me.

Thanks,
Leo

>   default_print_event()
>   {
>     ...
>     if (long_desc && print_state->long_desc) {
>             fprintf(fp, "%*s", 8, "[");
>             wordwrap(fp, long_desc, 8, pager_get_columns(), 0);
>             fprintf(fp, "]\n");
>     } else if (desc && print_state->desc) {
>     ...
>   }
> 
> The command `perf list` would be sufficent.

> 
> Thanks,
> Leo
>