[PATCH v1] perf parse-events: Make X modifier more respectful of groups

Ian Rogers posted 1 patch 3 months, 3 weeks ago
There is a newer version of this series
tools/perf/util/parse-events.c | 16 ++++++++++------
1 file changed, 10 insertions(+), 6 deletions(-)
[PATCH v1] perf parse-events: Make X modifier more respectful of groups
Posted by Ian Rogers 3 months, 3 weeks ago
Events with an X modifier were reordered within a group, for example
slots was made the leader in:
```
$ perf record -e '{cpu/mem-stores/ppu,cpu/slots/uX}' -- sleep 1
```

Fix by making `dont_regroup` evsels always use their index for
sorting. Make the cur_leader, when fixing the groups, be that of
`dont_regroup` evsel so that the `dont_regroup` evsel doesn't become a
leader.

On a tigerlake this patch corrects this and meets expectations in:
```
$ perf stat -e '{cpu/mem-stores/,cpu/slots/uX}' -a -- sleep 0.1

 Performance counter stats for 'system wide':

        83,458,652      cpu/mem-stores/
     2,720,854,880      cpu/slots/uX

       0.103780587 seconds time elapsed

$ perf stat -e 'slots,slots:X' -a -- sleep 0.1

 Performance counter stats for 'system wide':

       732,042,247      slots                (48.96%)
       643,288,155      slots:X              (51.04%)

       0.102731018 seconds time elapsed
```

Closes: https://lore.kernel.org/lkml/18f20d38-070c-4e17-bc90-cf7102e1e53d@linux.intel.com/
Fixes: 035c17893082 ("perf parse-events: Add 'X' modifier to exclude an event from being regrouped")
Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/perf/util/parse-events.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 3aec86aebdc6..1a5da93f4094 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1973,14 +1973,18 @@ static int evlist__cmp(void *_fg_idx, const struct list_head *l, const struct li
 	 * event's index is used. An index may be forced for events that
 	 * must be in the same group, namely Intel topdown events.
 	 */
-	if (*force_grouped_idx != -1 && arch_evsel__must_be_in_group(lhs)) {
+	if (lhs->dont_regroup) {
+		lhs_sort_idx = lhs_core->idx;
+	} else if (*force_grouped_idx != -1 && arch_evsel__must_be_in_group(lhs)) {
 		lhs_sort_idx = *force_grouped_idx;
 	} else {
 		bool lhs_has_group = lhs_core->leader != lhs_core || lhs_core->nr_members > 1;
 
 		lhs_sort_idx = lhs_has_group ? lhs_core->leader->idx : lhs_core->idx;
 	}
-	if (*force_grouped_idx != -1 && arch_evsel__must_be_in_group(rhs)) {
+	if (rhs->dont_regroup) {
+		rhs_sort_idx = rhs_core->idx;
+	} else if (*force_grouped_idx != -1 && arch_evsel__must_be_in_group(rhs)) {
 		rhs_sort_idx = *force_grouped_idx;
 	} else {
 		bool rhs_has_group = rhs_core->leader != rhs_core || rhs_core->nr_members > 1;
@@ -2078,10 +2082,10 @@ static int parse_events__sort_events_and_fix_groups(struct list_head *list)
 	 */
 	idx = 0;
 	list_for_each_entry(pos, list, core.node) {
-		const struct evsel *pos_leader = evsel__leader(pos);
+		struct evsel *pos_leader = evsel__leader(pos);
 		const char *pos_pmu_name = pos->group_pmu_name;
 		const char *cur_leader_pmu_name;
-		bool pos_force_grouped = force_grouped_idx != -1 &&
+		bool pos_force_grouped = force_grouped_idx != -1 && !pos->dont_regroup &&
 			arch_evsel__must_be_in_group(pos);
 
 		/* Reset index and nr_members. */
@@ -2095,8 +2099,8 @@ static int parse_events__sort_events_and_fix_groups(struct list_head *list)
 		 * groups can't span PMUs.
 		 */
 		if (!cur_leader || pos->dont_regroup) {
-			cur_leader = pos;
-			cur_leaders_grp = &pos->core;
+			cur_leader = pos_leader;
+			cur_leaders_grp = &cur_leader->core;
 			if (pos_force_grouped)
 				force_grouped_leader = pos;
 		}
-- 
2.51.0.858.gf9c4a03a3a-goog
Re: [PATCH v1] perf parse-events: Make X modifier more respectful of groups
Posted by Falcon, Thomas 3 months, 3 weeks ago
On Fri, 2025-10-17 at 09:57 -0700, Ian Rogers wrote:
> Events with an X modifier were reordered within a group, for example
> slots was made the leader in:
> ```
> $ perf record -e '{cpu/mem-stores/ppu,cpu/slots/uX}' -- sleep 1
> ```
> 
> Fix by making `dont_regroup` evsels always use their index for
> sorting. Make the cur_leader, when fixing the groups, be that of
> `dont_regroup` evsel so that the `dont_regroup` evsel doesn't become
> a
> leader.
> 
> On a tigerlake this patch corrects this and meets expectations in:
> ```
> $ perf stat -e '{cpu/mem-stores/,cpu/slots/uX}' -a -- sleep 0.1
> 
>  Performance counter stats for 'system wide':
> 
>         83,458,652      cpu/mem-stores/
>      2,720,854,880      cpu/slots/uX
> 
>        0.103780587 seconds time elapsed
> 
> $ perf stat -e 'slots,slots:X' -a -- sleep 0.1
> 
>  Performance counter stats for 'system wide':
> 
>        732,042,247      slots                (48.96%)
>        643,288,155      slots:X              (51.04%)
> 
>        0.102731018 seconds time elapsed
> ```
> 

Hi Ian, just confirming the results on an Alder Lake:

% sudo ./perf stat -e '{cpu_core/mem-stores/,cpu_core/slots/uX}' -a --
sleep 0.1

 Performance counter stats for 'system wide':

        23,097,346      cpu_core/mem-stores/
       666,910,974      cpu_core/slots/uX                 

       0.101369982 seconds time elapsed

 % sudo ./perf stat -e 'slots,slots:X' -a -- sleep 0.1  

 Performance counter stats for 'system wide':

     1,775,555,180      cpu_core/slots/                               
(50.08%)
     1,910,073,872      cpu_core/slots/X                              
(49.92%)

       0.101368283 seconds time elapsed

Tested-by: Thomas Falcon <thomas.falcon@intel.com>

Thanks,
Tom

> Closes:
> https://lore.kernel.org/lkml/18f20d38-070c-4e17-bc90-cf7102e1e53d@linux.intel.com/
> Fixes: 035c17893082 ("perf parse-events: Add 'X' modifier to exclude
> an event from being regrouped")
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
>  tools/perf/util/parse-events.c | 16 ++++++++++------
>  1 file changed, 10 insertions(+), 6 deletions(-)
> 
> diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-
> events.c
> index 3aec86aebdc6..1a5da93f4094 100644
> --- a/tools/perf/util/parse-events.c
> +++ b/tools/perf/util/parse-events.c
> @@ -1973,14 +1973,18 @@ static int evlist__cmp(void *_fg_idx, const
> struct list_head *l, const struct li
>  	 * event's index is used. An index may be forced for events
> that
>  	 * must be in the same group, namely Intel topdown events.
>  	 */
> -	if (*force_grouped_idx != -1 &&
> arch_evsel__must_be_in_group(lhs)) {
> +	if (lhs->dont_regroup) {
> +		lhs_sort_idx = lhs_core->idx;
> +	} else if (*force_grouped_idx != -1 &&
> arch_evsel__must_be_in_group(lhs)) {
>  		lhs_sort_idx = *force_grouped_idx;
>  	} else {
>  		bool lhs_has_group = lhs_core->leader != lhs_core ||
> lhs_core->nr_members > 1;
>  
>  		lhs_sort_idx = lhs_has_group ? lhs_core->leader->idx
> : lhs_core->idx;
>  	}
> -	if (*force_grouped_idx != -1 &&
> arch_evsel__must_be_in_group(rhs)) {
> +	if (rhs->dont_regroup) {
> +		rhs_sort_idx = rhs_core->idx;
> +	} else if (*force_grouped_idx != -1 &&
> arch_evsel__must_be_in_group(rhs)) {
>  		rhs_sort_idx = *force_grouped_idx;
>  	} else {
>  		bool rhs_has_group = rhs_core->leader != rhs_core ||
> rhs_core->nr_members > 1;
> @@ -2078,10 +2082,10 @@ static int
> parse_events__sort_events_and_fix_groups(struct list_head *list)
>  	 */
>  	idx = 0;
>  	list_for_each_entry(pos, list, core.node) {
> -		const struct evsel *pos_leader = evsel__leader(pos);
> +		struct evsel *pos_leader = evsel__leader(pos);
>  		const char *pos_pmu_name = pos->group_pmu_name;
>  		const char *cur_leader_pmu_name;
> -		bool pos_force_grouped = force_grouped_idx != -1 &&
> +		bool pos_force_grouped = force_grouped_idx != -1 &&
> !pos->dont_regroup &&
>  			arch_evsel__must_be_in_group(pos);
>  
>  		/* Reset index and nr_members. */
> @@ -2095,8 +2099,8 @@ static int
> parse_events__sort_events_and_fix_groups(struct list_head *list)
>  		 * groups can't span PMUs.
>  		 */
>  		if (!cur_leader || pos->dont_regroup) {
> -			cur_leader = pos;
> -			cur_leaders_grp = &pos->core;
> +			cur_leader = pos_leader;
> +			cur_leaders_grp = &cur_leader->core;
>  			if (pos_force_grouped)
>  				force_grouped_leader = pos;
>  		}

Re: [PATCH v1] perf parse-events: Make X modifier more respectful of groups
Posted by Ian Rogers 3 months, 3 weeks ago
On Fri, Oct 17, 2025 at 10:55 AM Falcon, Thomas <thomas.falcon@intel.com> wrote:
>
> On Fri, 2025-10-17 at 09:57 -0700, Ian Rogers wrote:
> > Events with an X modifier were reordered within a group, for example
> > slots was made the leader in:
> > ```
> > $ perf record -e '{cpu/mem-stores/ppu,cpu/slots/uX}' -- sleep 1
> > ```
> >
> > Fix by making `dont_regroup` evsels always use their index for
> > sorting. Make the cur_leader, when fixing the groups, be that of
> > `dont_regroup` evsel so that the `dont_regroup` evsel doesn't become
> > a
> > leader.
> >
> > On a tigerlake this patch corrects this and meets expectations in:
> > ```
> > $ perf stat -e '{cpu/mem-stores/,cpu/slots/uX}' -a -- sleep 0.1
> >
> >  Performance counter stats for 'system wide':
> >
> >         83,458,652      cpu/mem-stores/
> >      2,720,854,880      cpu/slots/uX
> >
> >        0.103780587 seconds time elapsed
> >
> > $ perf stat -e 'slots,slots:X' -a -- sleep 0.1
> >
> >  Performance counter stats for 'system wide':
> >
> >        732,042,247      slots                (48.96%)
> >        643,288,155      slots:X              (51.04%)
> >
> >        0.102731018 seconds time elapsed
> > ```
> >
>
> Hi Ian, just confirming the results on an Alder Lake:
>
> % sudo ./perf stat -e '{cpu_core/mem-stores/,cpu_core/slots/uX}' -a --
> sleep 0.1
>
>  Performance counter stats for 'system wide':
>
>         23,097,346      cpu_core/mem-stores/
>        666,910,974      cpu_core/slots/uX
>
>        0.101369982 seconds time elapsed
>
>  % sudo ./perf stat -e 'slots,slots:X' -a -- sleep 0.1
>
>  Performance counter stats for 'system wide':
>
>      1,775,555,180      cpu_core/slots/
> (50.08%)
>      1,910,073,872      cpu_core/slots/X
> (49.92%)
>
>        0.101368283 seconds time elapsed
>
> Tested-by: Thomas Falcon <thomas.falcon@intel.com>

I found an issue when the slots event is injected and will look at it in v2:
```
$ perf stat -v -e 'topdown-retiring' -a -- sleep 0.1
Using CPUID GenuineIntel-6-8D-1
topdown-retiring -> cpu/topdown-retiring/
WARNING: events were regrouped to match PMUs
evlist after sorting/fixing: '{slots,topdown-retiring}'
Control descriptor is not initialized
perf: util/evsel.c:2028: get_group_fd: Assertion `!(!leader->core.fd)' failed.
Aborted
```

Thanks,
Ian

> Thanks,
> Tom
>
> > Closes:
> > https://lore.kernel.org/lkml/18f20d38-070c-4e17-bc90-cf7102e1e53d@linux.intel.com/
> > Fixes: 035c17893082 ("perf parse-events: Add 'X' modifier to exclude
> > an event from being regrouped")
> > Signed-off-by: Ian Rogers <irogers@google.com>
> > ---
> >  tools/perf/util/parse-events.c | 16 ++++++++++------
> >  1 file changed, 10 insertions(+), 6 deletions(-)
> >
> > diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-
> > events.c
> > index 3aec86aebdc6..1a5da93f4094 100644
> > --- a/tools/perf/util/parse-events.c
> > +++ b/tools/perf/util/parse-events.c
> > @@ -1973,14 +1973,18 @@ static int evlist__cmp(void *_fg_idx, const
> > struct list_head *l, const struct li
> >        * event's index is used. An index may be forced for events
> > that
> >        * must be in the same group, namely Intel topdown events.
> >        */
> > -     if (*force_grouped_idx != -1 &&
> > arch_evsel__must_be_in_group(lhs)) {
> > +     if (lhs->dont_regroup) {
> > +             lhs_sort_idx = lhs_core->idx;
> > +     } else if (*force_grouped_idx != -1 &&
> > arch_evsel__must_be_in_group(lhs)) {
> >               lhs_sort_idx = *force_grouped_idx;
> >       } else {
> >               bool lhs_has_group = lhs_core->leader != lhs_core ||
> > lhs_core->nr_members > 1;
> >
> >               lhs_sort_idx = lhs_has_group ? lhs_core->leader->idx
> > : lhs_core->idx;
> >       }
> > -     if (*force_grouped_idx != -1 &&
> > arch_evsel__must_be_in_group(rhs)) {
> > +     if (rhs->dont_regroup) {
> > +             rhs_sort_idx = rhs_core->idx;
> > +     } else if (*force_grouped_idx != -1 &&
> > arch_evsel__must_be_in_group(rhs)) {
> >               rhs_sort_idx = *force_grouped_idx;
> >       } else {
> >               bool rhs_has_group = rhs_core->leader != rhs_core ||
> > rhs_core->nr_members > 1;
> > @@ -2078,10 +2082,10 @@ static int
> > parse_events__sort_events_and_fix_groups(struct list_head *list)
> >        */
> >       idx = 0;
> >       list_for_each_entry(pos, list, core.node) {
> > -             const struct evsel *pos_leader = evsel__leader(pos);
> > +             struct evsel *pos_leader = evsel__leader(pos);
> >               const char *pos_pmu_name = pos->group_pmu_name;
> >               const char *cur_leader_pmu_name;
> > -             bool pos_force_grouped = force_grouped_idx != -1 &&
> > +             bool pos_force_grouped = force_grouped_idx != -1 &&
> > !pos->dont_regroup &&
> >                       arch_evsel__must_be_in_group(pos);
> >
> >               /* Reset index and nr_members. */
> > @@ -2095,8 +2099,8 @@ static int
> > parse_events__sort_events_and_fix_groups(struct list_head *list)
> >                * groups can't span PMUs.
> >                */
> >               if (!cur_leader || pos->dont_regroup) {
> > -                     cur_leader = pos;
> > -                     cur_leaders_grp = &pos->core;
> > +                     cur_leader = pos_leader;
> > +                     cur_leaders_grp = &cur_leader->core;
> >                       if (pos_force_grouped)
> >                               force_grouped_leader = pos;
> >               }
>