[Patch v3 2/5] perf x86/topdown: Correct leader selection with sample_read enabled

Dapeng Mi posted 5 patches 1 year, 5 months ago
There is a newer version of this series
[Patch v3 2/5] perf x86/topdown: Correct leader selection with sample_read enabled
Posted by Dapeng Mi 1 year, 5 months ago
Addresses an issue where, in the absence of a topdown metrics event
within a sampling group, the slots event was incorrectly bypassed as
the sampling leader when sample_read was enabled.

perf record -e '{slots,branches}:S' -c 10000 -vv sleep 1

In this case, the slots event should be sampled as leader but the
branches event is sampled in fact like the verbose output shows.

perf_event_attr:
  type                             4 (cpu)
  size                             168
  config                           0x400 (slots)
  sample_type                      IP|TID|TIME|READ|CPU|IDENTIFIER
  read_format                      ID|GROUP|LOST
  disabled                         1
  sample_id_all                    1
  exclude_guest                    1
------------------------------------------------------------
sys_perf_event_open: pid -1  cpu 0  group_fd -1  flags 0x8 = 5
------------------------------------------------------------
perf_event_attr:
  type                             0 (PERF_TYPE_HARDWARE)
  size                             168
  config                           0x4 (PERF_COUNT_HW_BRANCH_INSTRUCTIONS)
  { sample_period, sample_freq }   10000
  sample_type                      IP|TID|TIME|READ|CPU|IDENTIFIER
  read_format                      ID|GROUP|LOST
  sample_id_all                    1
  exclude_guest                    1

The sample period of slots event instead of branches event is reset to
0.

This fix ensures the slots event remains the leader under these
conditions.

Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
---
 tools/perf/arch/x86/util/topdown.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/tools/perf/arch/x86/util/topdown.c b/tools/perf/arch/x86/util/topdown.c
index 49f25d67ed77..857e00cf579f 100644
--- a/tools/perf/arch/x86/util/topdown.c
+++ b/tools/perf/arch/x86/util/topdown.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include "api/fs/fs.h"
 #include "util/evsel.h"
+#include "util/evlist.h"
 #include "util/pmu.h"
 #include "util/pmus.h"
 #include "util/topdown.h"
@@ -87,11 +88,22 @@ bool arch_is_topdown_metrics(const struct evsel *evsel)
  */
 bool arch_topdown_sample_read(struct evsel *leader)
 {
+	struct evsel *evsel;
+
 	if (!evsel__sys_has_perf_metrics(leader))
 		return false;
 
-	if (arch_is_topdown_slots(leader))
-		return true;
+	if (!arch_is_topdown_slots(leader))
+		return false;
+
+	/*
+	 * If slots event as leader event but no topdown metric events
+	 * in group, slots event should still sample as leader.
+	 */
+	evlist__for_each_entry(leader->evlist, evsel) {
+		if (evsel != leader && arch_is_topdown_metrics(evsel))
+			return true;
+	}
 
 	return false;
 }
-- 
2.40.1
Re: [Patch v3 2/5] perf x86/topdown: Correct leader selection with sample_read enabled
Posted by Liang, Kan 1 year, 4 months ago

On 2024-07-12 1:03 p.m., Dapeng Mi wrote:
> Addresses an issue where, in the absence of a topdown metrics event
> within a sampling group, the slots event was incorrectly bypassed as
> the sampling leader when sample_read was enabled.
> 
> perf record -e '{slots,branches}:S' -c 10000 -vv sleep 1
> 
> In this case, the slots event should be sampled as leader but the
> branches event is sampled in fact like the verbose output shows.
> 
> perf_event_attr:
>   type                             4 (cpu)
>   size                             168
>   config                           0x400 (slots)
>   sample_type                      IP|TID|TIME|READ|CPU|IDENTIFIER
>   read_format                      ID|GROUP|LOST
>   disabled                         1
>   sample_id_all                    1
>   exclude_guest                    1
> ------------------------------------------------------------
> sys_perf_event_open: pid -1  cpu 0  group_fd -1  flags 0x8 = 5
> ------------------------------------------------------------
> perf_event_attr:
>   type                             0 (PERF_TYPE_HARDWARE)
>   size                             168
>   config                           0x4 (PERF_COUNT_HW_BRANCH_INSTRUCTIONS)
>   { sample_period, sample_freq }   10000
>   sample_type                      IP|TID|TIME|READ|CPU|IDENTIFIER
>   read_format                      ID|GROUP|LOST
>   sample_id_all                    1
>   exclude_guest                    1
> 
> The sample period of slots event instead of branches event is reset to
> 0.
> 
> This fix ensures the slots event remains the leader under these
> conditions.
> 
> Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
> ---
>  tools/perf/arch/x86/util/topdown.c | 16 ++++++++++++++--
>  1 file changed, 14 insertions(+), 2 deletions(-)
> 
> diff --git a/tools/perf/arch/x86/util/topdown.c b/tools/perf/arch/x86/util/topdown.c
> index 49f25d67ed77..857e00cf579f 100644
> --- a/tools/perf/arch/x86/util/topdown.c
> +++ b/tools/perf/arch/x86/util/topdown.c
> @@ -1,6 +1,7 @@
>  // SPDX-License-Identifier: GPL-2.0
>  #include "api/fs/fs.h"
>  #include "util/evsel.h"
> +#include "util/evlist.h"
>  #include "util/pmu.h"
>  #include "util/pmus.h"
>  #include "util/topdown.h"
> @@ -87,11 +88,22 @@ bool arch_is_topdown_metrics(const struct evsel *evsel)
>   */
>  bool arch_topdown_sample_read(struct evsel *leader)
>  {
> +	struct evsel *evsel;
> +
>  	if (!evsel__sys_has_perf_metrics(leader))
>  		return false;
>  
> -	if (arch_is_topdown_slots(leader))
> -		return true;
> +	if (!arch_is_topdown_slots(leader))
> +		return false;
> +
> +	/*
> +	 * If slots event as leader event but no topdown metric events
> +	 * in group, slots event should still sample as leader.
> +	 */
> +	evlist__for_each_entry(leader->evlist, evsel) {
		evsel = leader->leader;
		evlist__for_each_entry_continue(leader->evlist, evsel)
			if (evsel->leader != leader->leader)
				return false;

Maybe we should limit the check in a group, rather than the entire
evlist. Something as above (not tested)?

Thanks,
Kan
> +		if (evsel != leader && arch_is_topdown_metrics(evsel))
> +			return true;
> +	}
>  
>  	return false;
>  }
Re: [Patch v3 2/5] perf x86/topdown: Correct leader selection with sample_read enabled
Posted by Mi, Dapeng 1 year, 4 months ago
On 8/12/2024 11:18 PM, Liang, Kan wrote:
>
> On 2024-07-12 1:03 p.m., Dapeng Mi wrote:
>> Addresses an issue where, in the absence of a topdown metrics event
>> within a sampling group, the slots event was incorrectly bypassed as
>> the sampling leader when sample_read was enabled.
>>
>> perf record -e '{slots,branches}:S' -c 10000 -vv sleep 1
>>
>> In this case, the slots event should be sampled as leader but the
>> branches event is sampled in fact like the verbose output shows.
>>
>> perf_event_attr:
>>   type                             4 (cpu)
>>   size                             168
>>   config                           0x400 (slots)
>>   sample_type                      IP|TID|TIME|READ|CPU|IDENTIFIER
>>   read_format                      ID|GROUP|LOST
>>   disabled                         1
>>   sample_id_all                    1
>>   exclude_guest                    1
>> ------------------------------------------------------------
>> sys_perf_event_open: pid -1  cpu 0  group_fd -1  flags 0x8 = 5
>> ------------------------------------------------------------
>> perf_event_attr:
>>   type                             0 (PERF_TYPE_HARDWARE)
>>   size                             168
>>   config                           0x4 (PERF_COUNT_HW_BRANCH_INSTRUCTIONS)
>>   { sample_period, sample_freq }   10000
>>   sample_type                      IP|TID|TIME|READ|CPU|IDENTIFIER
>>   read_format                      ID|GROUP|LOST
>>   sample_id_all                    1
>>   exclude_guest                    1
>>
>> The sample period of slots event instead of branches event is reset to
>> 0.
>>
>> This fix ensures the slots event remains the leader under these
>> conditions.
>>
>> Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
>> ---
>>  tools/perf/arch/x86/util/topdown.c | 16 ++++++++++++++--
>>  1 file changed, 14 insertions(+), 2 deletions(-)
>>
>> diff --git a/tools/perf/arch/x86/util/topdown.c b/tools/perf/arch/x86/util/topdown.c
>> index 49f25d67ed77..857e00cf579f 100644
>> --- a/tools/perf/arch/x86/util/topdown.c
>> +++ b/tools/perf/arch/x86/util/topdown.c
>> @@ -1,6 +1,7 @@
>>  // SPDX-License-Identifier: GPL-2.0
>>  #include "api/fs/fs.h"
>>  #include "util/evsel.h"
>> +#include "util/evlist.h"
>>  #include "util/pmu.h"
>>  #include "util/pmus.h"
>>  #include "util/topdown.h"
>> @@ -87,11 +88,22 @@ bool arch_is_topdown_metrics(const struct evsel *evsel)
>>   */
>>  bool arch_topdown_sample_read(struct evsel *leader)
>>  {
>> +	struct evsel *evsel;
>> +
>>  	if (!evsel__sys_has_perf_metrics(leader))
>>  		return false;
>>  
>> -	if (arch_is_topdown_slots(leader))
>> -		return true;
>> +	if (!arch_is_topdown_slots(leader))
>> +		return false;
>> +
>> +	/*
>> +	 * If slots event as leader event but no topdown metric events
>> +	 * in group, slots event should still sample as leader.
>> +	 */
>> +	evlist__for_each_entry(leader->evlist, evsel) {
> 		evsel = leader->leader;
> 		evlist__for_each_entry_continue(leader->evlist, evsel)
> 			if (evsel->leader != leader->leader)
> 				return false;
>
> Maybe we should limit the check in a group, rather than the entire
> evlist. Something as above (not tested)?

Good point. Would limit the check in the group.


>
> Thanks,
> Kan
>> +		if (evsel != leader && arch_is_topdown_metrics(evsel))
>> +			return true;
>> +	}
>>  
>>  	return false;
>>  }
Re: [Patch v3 2/5] perf x86/topdown: Correct leader selection with sample_read enabled
Posted by Arnaldo Carvalho de Melo 1 year, 4 months ago
On Fri, Jul 12, 2024 at 05:03:36PM +0000, Dapeng Mi wrote:
> Addresses an issue where, in the absence of a topdown metrics event
> within a sampling group, the slots event was incorrectly bypassed as
> the sampling leader when sample_read was enabled.
> 
> perf record -e '{slots,branches}:S' -c 10000 -vv sleep 1

Kan, can you please take a look at this patch and the others in this
series?

Thanks,

- Arnaldo
 
> In this case, the slots event should be sampled as leader but the
> branches event is sampled in fact like the verbose output shows.
> 
> perf_event_attr:
>   type                             4 (cpu)
>   size                             168
>   config                           0x400 (slots)
>   sample_type                      IP|TID|TIME|READ|CPU|IDENTIFIER
>   read_format                      ID|GROUP|LOST
>   disabled                         1
>   sample_id_all                    1
>   exclude_guest                    1
> ------------------------------------------------------------
> sys_perf_event_open: pid -1  cpu 0  group_fd -1  flags 0x8 = 5
> ------------------------------------------------------------
> perf_event_attr:
>   type                             0 (PERF_TYPE_HARDWARE)
>   size                             168
>   config                           0x4 (PERF_COUNT_HW_BRANCH_INSTRUCTIONS)
>   { sample_period, sample_freq }   10000
>   sample_type                      IP|TID|TIME|READ|CPU|IDENTIFIER
>   read_format                      ID|GROUP|LOST
>   sample_id_all                    1
>   exclude_guest                    1
> 
> The sample period of slots event instead of branches event is reset to
> 0.
> 
> This fix ensures the slots event remains the leader under these
> conditions.
> 
> Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
> ---
>  tools/perf/arch/x86/util/topdown.c | 16 ++++++++++++++--
>  1 file changed, 14 insertions(+), 2 deletions(-)
> 
> diff --git a/tools/perf/arch/x86/util/topdown.c b/tools/perf/arch/x86/util/topdown.c
> index 49f25d67ed77..857e00cf579f 100644
> --- a/tools/perf/arch/x86/util/topdown.c
> +++ b/tools/perf/arch/x86/util/topdown.c
> @@ -1,6 +1,7 @@
>  // SPDX-License-Identifier: GPL-2.0
>  #include "api/fs/fs.h"
>  #include "util/evsel.h"
> +#include "util/evlist.h"
>  #include "util/pmu.h"
>  #include "util/pmus.h"
>  #include "util/topdown.h"
> @@ -87,11 +88,22 @@ bool arch_is_topdown_metrics(const struct evsel *evsel)
>   */
>  bool arch_topdown_sample_read(struct evsel *leader)
>  {
> +	struct evsel *evsel;
> +
>  	if (!evsel__sys_has_perf_metrics(leader))
>  		return false;
>  
> -	if (arch_is_topdown_slots(leader))
> -		return true;
> +	if (!arch_is_topdown_slots(leader))
> +		return false;
> +
> +	/*
> +	 * If slots event as leader event but no topdown metric events
> +	 * in group, slots event should still sample as leader.
> +	 */
> +	evlist__for_each_entry(leader->evlist, evsel) {
> +		if (evsel != leader && arch_is_topdown_metrics(evsel))
> +			return true;
> +	}
>  
>  	return false;
>  }
> -- 
> 2.40.1
Re: [Patch v3 2/5] perf x86/topdown: Correct leader selection with sample_read enabled
Posted by Liang, Kan 1 year, 4 months ago

On 2024-08-12 9:42 a.m., Arnaldo Carvalho de Melo wrote:
> On Fri, Jul 12, 2024 at 05:03:36PM +0000, Dapeng Mi wrote:
>> Addresses an issue where, in the absence of a topdown metrics event
>> within a sampling group, the slots event was incorrectly bypassed as
>> the sampling leader when sample_read was enabled.
>>
>> perf record -e '{slots,branches}:S' -c 10000 -vv sleep 1
> 
> Kan, can you please take a look at this patch and the others in this
> series?

Sure.

Thanks,
Kan

> 
> Thanks,
> 
> - Arnaldo
>  
>> In this case, the slots event should be sampled as leader but the
>> branches event is sampled in fact like the verbose output shows.
>>
>> perf_event_attr:
>>   type                             4 (cpu)
>>   size                             168
>>   config                           0x400 (slots)
>>   sample_type                      IP|TID|TIME|READ|CPU|IDENTIFIER
>>   read_format                      ID|GROUP|LOST
>>   disabled                         1
>>   sample_id_all                    1
>>   exclude_guest                    1
>> ------------------------------------------------------------
>> sys_perf_event_open: pid -1  cpu 0  group_fd -1  flags 0x8 = 5
>> ------------------------------------------------------------
>> perf_event_attr:
>>   type                             0 (PERF_TYPE_HARDWARE)
>>   size                             168
>>   config                           0x4 (PERF_COUNT_HW_BRANCH_INSTRUCTIONS)
>>   { sample_period, sample_freq }   10000
>>   sample_type                      IP|TID|TIME|READ|CPU|IDENTIFIER
>>   read_format                      ID|GROUP|LOST
>>   sample_id_all                    1
>>   exclude_guest                    1
>>
>> The sample period of slots event instead of branches event is reset to
>> 0.
>>
>> This fix ensures the slots event remains the leader under these
>> conditions.
>>
>> Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
>> ---
>>  tools/perf/arch/x86/util/topdown.c | 16 ++++++++++++++--
>>  1 file changed, 14 insertions(+), 2 deletions(-)
>>
>> diff --git a/tools/perf/arch/x86/util/topdown.c b/tools/perf/arch/x86/util/topdown.c
>> index 49f25d67ed77..857e00cf579f 100644
>> --- a/tools/perf/arch/x86/util/topdown.c
>> +++ b/tools/perf/arch/x86/util/topdown.c
>> @@ -1,6 +1,7 @@
>>  // SPDX-License-Identifier: GPL-2.0
>>  #include "api/fs/fs.h"
>>  #include "util/evsel.h"
>> +#include "util/evlist.h"
>>  #include "util/pmu.h"
>>  #include "util/pmus.h"
>>  #include "util/topdown.h"
>> @@ -87,11 +88,22 @@ bool arch_is_topdown_metrics(const struct evsel *evsel)
>>   */
>>  bool arch_topdown_sample_read(struct evsel *leader)
>>  {
>> +	struct evsel *evsel;
>> +
>>  	if (!evsel__sys_has_perf_metrics(leader))
>>  		return false;
>>  
>> -	if (arch_is_topdown_slots(leader))
>> -		return true;
>> +	if (!arch_is_topdown_slots(leader))
>> +		return false;
>> +
>> +	/*
>> +	 * If slots event as leader event but no topdown metric events
>> +	 * in group, slots event should still sample as leader.
>> +	 */
>> +	evlist__for_each_entry(leader->evlist, evsel) {
>> +		if (evsel != leader && arch_is_topdown_metrics(evsel))
>> +			return true;
>> +	}
>>  
>>  	return false;
>>  }
>> -- 
>> 2.40.1
>