[PATCH v2] LoongArch: Fix PMU counter allocation for mixed-type event groups

Lisa Robinson posted 1 patch 1 month ago
There is a newer version of this series
arch/loongarch/kernel/perf_event.c | 21 ++++++++++++++++++---
1 file changed, 18 insertions(+), 3 deletions(-)
[PATCH v2] LoongArch: Fix PMU counter allocation for mixed-type event groups
Posted by Lisa Robinson 1 month ago
When validating a perf event group, validate_group() unconditionally
attempts to allocate hardware PMU counters for the leader, sibling
events and the new event being added.

This is incorrect for mixed-type groups. If a PERF_TYPE_SOFTWARE event
ispart of the group, the current code still tries to allocate a hardware
PMU counter for it, which can wrongly consume hardware PMU resources and
cause spurious allocation failures.

Fix this by only allocating PMU counters for hardware events during group
validation, and skipping software events.

A trimmed down reproducer is as simple as this:

  #include <stdio.h>
  #include <assert.h>
  #include <unistd.h>
  #include <string.h>
  #include <sys/syscall.h>
  #include <linux/perf_event.h>

  int
  main (int argc, char *argv[])
  {
  	struct perf_event_attr attr = { 0 };
  	int fds[5];

  	attr.disabled = 1;
  	attr.exclude_kernel = 1;
  	attr.exclude_hv = 1;
  	attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
  		PERF_FORMAT_TOTAL_TIME_RUNNING | PERF_FORMAT_ID | PERF_FORMAT_GROUP;
  	attr.size = sizeof (attr);

  	attr.type = PERF_TYPE_SOFTWARE;
  	attr.config = PERF_COUNT_SW_DUMMY;
  	fds[0] = syscall (SYS_perf_event_open, &attr, 0, -1, -1, 0);
  	assert (fds[0] >= 0);

  	attr.type = PERF_TYPE_HARDWARE;
  	attr.config = PERF_COUNT_HW_CPU_CYCLES;
  	fds[1] = syscall (SYS_perf_event_open, &attr, 0, -1, fds[0], 0);
  	assert (fds[1] >= 0);

  	attr.type = PERF_TYPE_HARDWARE;
  	attr.config = PERF_COUNT_HW_INSTRUCTIONS;
  	fds[2] = syscall (SYS_perf_event_open, &attr, 0, -1, fds[0], 0);
  	assert (fds[2] >= 0);

  	attr.type = PERF_TYPE_HARDWARE;
  	attr.config = PERF_COUNT_HW_BRANCH_MISSES;
  	fds[3] = syscall (SYS_perf_event_open, &attr, 0, -1, fds[0], 0);
  	assert (fds[3] >= 0);

  	attr.type = PERF_TYPE_HARDWARE;
  	attr.config = PERF_COUNT_HW_CACHE_REFERENCES;
  	fds[4] = syscall (SYS_perf_event_open, &attr, 0, -1, fds[0], 0);
  	assert (fds[4] >= 0);

  	printf ("PASSED\n");

  	return 0;
  }

Fixes: b37042b2bb7c ("LoongArch: Add perf events support")
Signed-off-by: Lisa Robinson <lisa@bytefly.space>
---
Changes in v2:
- Factor out duplicated perf event type checks into an inline helper.
---
 arch/loongarch/kernel/perf_event.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/arch/loongarch/kernel/perf_event.c b/arch/loongarch/kernel/perf_event.c
index 9d257c8519c9..e34a6fb33e11 100644
--- a/arch/loongarch/kernel/perf_event.c
+++ b/arch/loongarch/kernel/perf_event.c
@@ -626,6 +626,18 @@ static const struct loongarch_perf_event *loongarch_pmu_map_cache_event(u64 conf
 	return pev;
 }
 
+static inline bool loongarch_pmu_event_requires_counter(const struct perf_event *event)
+{
+	switch (event->attr.type) {
+	case PERF_TYPE_HARDWARE:
+	case PERF_TYPE_HW_CACHE:
+	case PERF_TYPE_RAW:
+		return true;
+	default:
+		return false;
+	}
+}
+
 static int validate_group(struct perf_event *event)
 {
 	struct cpu_hw_events fake_cpuc;
@@ -633,15 +645,18 @@ static int validate_group(struct perf_event *event)
 
 	memset(&fake_cpuc, 0, sizeof(fake_cpuc));
 
-	if (loongarch_pmu_alloc_counter(&fake_cpuc, &leader->hw) < 0)
+	if (loongarch_pmu_event_requires_counter(leader) &&
+	    loongarch_pmu_alloc_counter(&fake_cpuc, &leader->hw) < 0)
 		return -EINVAL;
 
 	for_each_sibling_event(sibling, leader) {
-		if (loongarch_pmu_alloc_counter(&fake_cpuc, &sibling->hw) < 0)
+		if (loongarch_pmu_event_requires_counter(sibling) &&
+		    loongarch_pmu_alloc_counter(&fake_cpuc, &sibling->hw) < 0)
 			return -EINVAL;
 	}
 
-	if (loongarch_pmu_alloc_counter(&fake_cpuc, &event->hw) < 0)
+	if (loongarch_pmu_event_requires_counter(event) &&
+	    loongarch_pmu_alloc_counter(&fake_cpuc, &event->hw) < 0)
 		return -EINVAL;
 
 	return 0;
-- 
2.52.0
Re: [PATCH v2] LoongArch: Fix PMU counter allocation for mixed-type event groups
Posted by Mi, Dapeng 1 month ago
On 1/5/2026 12:23 AM, Lisa Robinson wrote:
> When validating a perf event group, validate_group() unconditionally
> attempts to allocate hardware PMU counters for the leader, sibling
> events and the new event being added.
>
> This is incorrect for mixed-type groups. If a PERF_TYPE_SOFTWARE event
> ispart of the group, the current code still tries to allocate a hardware

ispart -> "is part"


> PMU counter for it, which can wrongly consume hardware PMU resources and
> cause spurious allocation failures.
>
> Fix this by only allocating PMU counters for hardware events during group
> validation, and skipping software events.
>
> A trimmed down reproducer is as simple as this:
>
>   #include <stdio.h>
>   #include <assert.h>
>   #include <unistd.h>
>   #include <string.h>
>   #include <sys/syscall.h>
>   #include <linux/perf_event.h>
>
>   int
>   main (int argc, char *argv[])
>   {
>   	struct perf_event_attr attr = { 0 };
>   	int fds[5];
>
>   	attr.disabled = 1;
>   	attr.exclude_kernel = 1;
>   	attr.exclude_hv = 1;
>   	attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
>   		PERF_FORMAT_TOTAL_TIME_RUNNING | PERF_FORMAT_ID | PERF_FORMAT_GROUP;
>   	attr.size = sizeof (attr);
>
>   	attr.type = PERF_TYPE_SOFTWARE;
>   	attr.config = PERF_COUNT_SW_DUMMY;
>   	fds[0] = syscall (SYS_perf_event_open, &attr, 0, -1, -1, 0);
>   	assert (fds[0] >= 0);
>
>   	attr.type = PERF_TYPE_HARDWARE;
>   	attr.config = PERF_COUNT_HW_CPU_CYCLES;
>   	fds[1] = syscall (SYS_perf_event_open, &attr, 0, -1, fds[0], 0);
>   	assert (fds[1] >= 0);
>
>   	attr.type = PERF_TYPE_HARDWARE;
>   	attr.config = PERF_COUNT_HW_INSTRUCTIONS;
>   	fds[2] = syscall (SYS_perf_event_open, &attr, 0, -1, fds[0], 0);
>   	assert (fds[2] >= 0);
>
>   	attr.type = PERF_TYPE_HARDWARE;
>   	attr.config = PERF_COUNT_HW_BRANCH_MISSES;
>   	fds[3] = syscall (SYS_perf_event_open, &attr, 0, -1, fds[0], 0);
>   	assert (fds[3] >= 0);
>
>   	attr.type = PERF_TYPE_HARDWARE;
>   	attr.config = PERF_COUNT_HW_CACHE_REFERENCES;
>   	fds[4] = syscall (SYS_perf_event_open, &attr, 0, -1, fds[0], 0);
>   	assert (fds[4] >= 0);
>
>   	printf ("PASSED\n");
>
>   	return 0;
>   }
>
> Fixes: b37042b2bb7c ("LoongArch: Add perf events support")
> Signed-off-by: Lisa Robinson <lisa@bytefly.space>
> ---
> Changes in v2:
> - Factor out duplicated perf event type checks into an inline helper.
> ---
>  arch/loongarch/kernel/perf_event.c | 21 ++++++++++++++++++---
>  1 file changed, 18 insertions(+), 3 deletions(-)
>
> diff --git a/arch/loongarch/kernel/perf_event.c b/arch/loongarch/kernel/perf_event.c
> index 9d257c8519c9..e34a6fb33e11 100644
> --- a/arch/loongarch/kernel/perf_event.c
> +++ b/arch/loongarch/kernel/perf_event.c
> @@ -626,6 +626,18 @@ static const struct loongarch_perf_event *loongarch_pmu_map_cache_event(u64 conf
>  	return pev;
>  }
>  
> +static inline bool loongarch_pmu_event_requires_counter(const struct perf_event *event)
> +{
> +	switch (event->attr.type) {
> +	case PERF_TYPE_HARDWARE:
> +	case PERF_TYPE_HW_CACHE:
> +	case PERF_TYPE_RAW:
> +		return true;
> +	default:
> +		return false;
> +	}
> +}
> +
>  static int validate_group(struct perf_event *event)
>  {
>  	struct cpu_hw_events fake_cpuc;
> @@ -633,15 +645,18 @@ static int validate_group(struct perf_event *event)
>  
>  	memset(&fake_cpuc, 0, sizeof(fake_cpuc));
>  
> -	if (loongarch_pmu_alloc_counter(&fake_cpuc, &leader->hw) < 0)
> +	if (loongarch_pmu_event_requires_counter(leader) &&
> +	    loongarch_pmu_alloc_counter(&fake_cpuc, &leader->hw) < 0)
>  		return -EINVAL;
>  
>  	for_each_sibling_event(sibling, leader) {
> -		if (loongarch_pmu_alloc_counter(&fake_cpuc, &sibling->hw) < 0)
> +		if (loongarch_pmu_event_requires_counter(sibling) &&
> +		    loongarch_pmu_alloc_counter(&fake_cpuc, &sibling->hw) < 0)
>  			return -EINVAL;
>  	}
>  
> -	if (loongarch_pmu_alloc_counter(&fake_cpuc, &event->hw) < 0)
> +	if (loongarch_pmu_event_requires_counter(event) &&
> +	    loongarch_pmu_alloc_counter(&fake_cpuc, &event->hw) < 0)
>  		return -EINVAL;
>  
>  	return 0;

The code looks good to me, but I'm not quite familiar the loongarch perf
code, then I won't give a reviewed-by and leave it to loongarch perf
experts. Thanks.