[RESEND PATCH 01/12] perf/x86/intel: Support the PEBS event mask

kan.liang@linux.intel.com posted 12 patches 1 year, 6 months ago
There is a newer version of this series
[RESEND PATCH 01/12] perf/x86/intel: Support the PEBS event mask
Posted by kan.liang@linux.intel.com 1 year, 6 months ago
From: Kan Liang <kan.liang@linux.intel.com>

The current perf assumes that the counters that support PEBS are
contiguous. But it's not guaranteed with the new leaf 0x23 introduced.
The counters are enumerated with a counter mask. There may be holes in
the counter mask for future platforms or in a virtualization
environment.

Store the PEBS event mask rather than the maximum number of PEBS
counters in the x86 PMU structures.

Reviewed-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
---
 arch/x86/events/intel/core.c    |  8 ++++----
 arch/x86/events/intel/ds.c      | 14 +++++++-------
 arch/x86/events/perf_event.h    |  4 ++--
 arch/x86/include/asm/intel_ds.h |  6 ++++++
 4 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 38c1b1f1deaa..c27a9f75defb 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -4728,7 +4728,7 @@ static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu)
 {
 	intel_pmu_check_num_counters(&pmu->num_counters, &pmu->num_counters_fixed,
 				     &pmu->intel_ctrl, (1ULL << pmu->num_counters_fixed) - 1);
-	pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters);
+	pmu->pebs_events_mask = intel_pmu_pebs_mask(GENMASK_ULL(pmu->num_counters - 1, 0));
 	pmu->unconstrained = (struct event_constraint)
 			     __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
 						0, pmu->num_counters, 0, 0);
@@ -6070,7 +6070,7 @@ static __always_inline int intel_pmu_init_hybrid(enum hybrid_pmu_type pmus)
 
 		pmu->num_counters = x86_pmu.num_counters;
 		pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
-		pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters);
+		pmu->pebs_events_mask = intel_pmu_pebs_mask(GENMASK_ULL(pmu->num_counters - 1, 0));
 		pmu->unconstrained = (struct event_constraint)
 				     __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
 							0, pmu->num_counters, 0, 0);
@@ -6193,7 +6193,7 @@ __init int intel_pmu_init(void)
 	x86_pmu.events_maskl		= ebx.full;
 	x86_pmu.events_mask_len		= eax.split.mask_length;
 
-	x86_pmu.max_pebs_events		= min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters);
+	x86_pmu.pebs_events_mask	= intel_pmu_pebs_mask(GENMASK_ULL(x86_pmu.num_counters - 1, 0));
 	x86_pmu.pebs_capable		= PEBS_COUNTER_MASK;
 
 	/*
@@ -6822,7 +6822,7 @@ __init int intel_pmu_init(void)
 			pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
 		}
 
-		pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters);
+		pmu->pebs_events_mask = intel_pmu_pebs_mask(GENMASK_ULL(pmu->num_counters - 1, 0));
 		pmu->unconstrained = (struct event_constraint)
 					__EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
 							   0, pmu->num_counters, 0, 0);
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index e010bfed8417..a0104c82baed 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1137,7 +1137,7 @@ void intel_pmu_pebs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sche
 static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
 {
 	struct debug_store *ds = cpuc->ds;
-	int max_pebs_events = hybrid(cpuc->pmu, max_pebs_events);
+	int max_pebs_events = hweight64(hybrid(cpuc->pmu, pebs_events_mask));
 	int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed);
 	u64 threshold;
 	int reserved;
@@ -2157,6 +2157,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
 	void *base, *at, *top;
 	short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
 	short error[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
+	int max_pebs_events = hweight64(x86_pmu.pebs_events_mask);
 	int bit, i, size;
 	u64 mask;
 
@@ -2168,8 +2169,8 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
 
 	ds->pebs_index = ds->pebs_buffer_base;
 
-	mask = (1ULL << x86_pmu.max_pebs_events) - 1;
-	size = x86_pmu.max_pebs_events;
+	mask = x86_pmu.pebs_events_mask;
+	size = max_pebs_events;
 	if (x86_pmu.flags & PMU_FL_PEBS_ALL) {
 		mask |= ((1ULL << x86_pmu.num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED;
 		size = INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed;
@@ -2208,8 +2209,8 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
 			pebs_status = p->status = cpuc->pebs_enabled;
 
 		bit = find_first_bit((unsigned long *)&pebs_status,
-					x86_pmu.max_pebs_events);
-		if (bit >= x86_pmu.max_pebs_events)
+					max_pebs_events);
+		if (bit >= max_pebs_events)
 			continue;
 
 		/*
@@ -2267,7 +2268,6 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
 {
 	short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-	int max_pebs_events = hybrid(cpuc->pmu, max_pebs_events);
 	int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed);
 	struct debug_store *ds = cpuc->ds;
 	struct perf_event *event;
@@ -2283,7 +2283,7 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
 
 	ds->pebs_index = ds->pebs_buffer_base;
 
-	mask = ((1ULL << max_pebs_events) - 1) |
+	mask = hybrid(cpuc->pmu, pebs_events_mask) |
 	       (((1ULL << num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED);
 	size = INTEL_PMC_IDX_FIXED + num_counters_fixed;
 
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 72b022a1e16c..880fe0c4aa68 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -684,7 +684,7 @@ struct x86_hybrid_pmu {
 	cpumask_t			supported_cpus;
 	union perf_capabilities		intel_cap;
 	u64				intel_ctrl;
-	int				max_pebs_events;
+	u64				pebs_events_mask;
 	int				num_counters;
 	int				num_counters_fixed;
 	struct event_constraint		unconstrained;
@@ -852,7 +852,7 @@ struct x86_pmu {
 			pebs_ept		:1;
 	int		pebs_record_size;
 	int		pebs_buffer_size;
-	int		max_pebs_events;
+	u64		pebs_events_mask;
 	void		(*drain_pebs)(struct pt_regs *regs, struct perf_sample_data *data);
 	struct event_constraint *pebs_constraints;
 	void		(*pebs_aliases)(struct perf_event *event);
diff --git a/arch/x86/include/asm/intel_ds.h b/arch/x86/include/asm/intel_ds.h
index 2f9eeb5c3069..d11f0f480ccb 100644
--- a/arch/x86/include/asm/intel_ds.h
+++ b/arch/x86/include/asm/intel_ds.h
@@ -9,6 +9,7 @@
 /* The maximal number of PEBS events: */
 #define MAX_PEBS_EVENTS_FMT4	8
 #define MAX_PEBS_EVENTS		32
+#define MAX_PEBS_EVENTS_MASK	GENMASK_ULL(MAX_PEBS_EVENTS - 1, 0)
 #define MAX_FIXED_PEBS_EVENTS	16
 
 /*
@@ -35,4 +36,9 @@ struct debug_store_buffers {
 	char	pebs_buffer[PEBS_BUFFER_SIZE];
 };
 
+static inline u64 intel_pmu_pebs_mask(u64 cntr_mask)
+{
+	return MAX_PEBS_EVENTS_MASK & cntr_mask;
+}
+
 #endif
-- 
2.35.1
Re: [RESEND PATCH 01/12] perf/x86/intel: Support the PEBS event mask
Posted by Peter Zijlstra 1 year, 5 months ago
On Tue, Jun 18, 2024 at 08:10:33AM -0700, kan.liang@linux.intel.com wrote:
> diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
> index e010bfed8417..a0104c82baed 100644
> --- a/arch/x86/events/intel/ds.c
> +++ b/arch/x86/events/intel/ds.c

> @@ -2157,6 +2157,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
>  	void *base, *at, *top;
>  	short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
>  	short error[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
> +	int max_pebs_events = hweight64(x86_pmu.pebs_events_mask);

Consider pebs_events_mask = 0xf0, then max_pebs_events becomes 4.

>  	int bit, i, size;
>  	u64 mask;
>  
> @@ -2168,8 +2169,8 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
>  
>  	ds->pebs_index = ds->pebs_buffer_base;
>  
> -	mask = (1ULL << x86_pmu.max_pebs_events) - 1;
> -	size = x86_pmu.max_pebs_events;
> +	mask = x86_pmu.pebs_events_mask;
> +	size = max_pebs_events;

This is wrong.. you have 8 bits to iterate, of which only the top 4 are
set.

>  	if (x86_pmu.flags & PMU_FL_PEBS_ALL) {
>  		mask |= ((1ULL << x86_pmu.num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED;
>  		size = INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed;
> @@ -2208,8 +2209,8 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
>  			pebs_status = p->status = cpuc->pebs_enabled;
>  
>  		bit = find_first_bit((unsigned long *)&pebs_status,
> -					x86_pmu.max_pebs_events);
> -		if (bit >= x86_pmu.max_pebs_events)
> +					max_pebs_events);
> +		if (bit >= max_pebs_events)
>  			continue;

But the bit check here then truncates the thing to the lower 4 bits
which are all 0.

Should this not be something like:

		if (!(pebs_event_mask & (1 << bit)))
			continue;

?
Re: [RESEND PATCH 01/12] perf/x86/intel: Support the PEBS event mask
Posted by Liang, Kan 1 year, 5 months ago

On 2024-06-20 3:02 a.m., Peter Zijlstra wrote:
> On Tue, Jun 18, 2024 at 08:10:33AM -0700, kan.liang@linux.intel.com wrote:
>> diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
>> index e010bfed8417..a0104c82baed 100644
>> --- a/arch/x86/events/intel/ds.c
>> +++ b/arch/x86/events/intel/ds.c
> 
>> @@ -2157,6 +2157,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
>>  	void *base, *at, *top;
>>  	short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
>>  	short error[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
>> +	int max_pebs_events = hweight64(x86_pmu.pebs_events_mask);
> 
> Consider pebs_events_mask = 0xf0, then max_pebs_events becomes 4.

The intel_pmu_drain_pebs_nhm() is a NHM specific function. It's
impossible that there is a pebs_events_mask = 0xf0.

There are only 4 counters. The mask should always be 0xf.
> 
>>  	int bit, i, size;
>>  	u64 mask;
>>  
>> @@ -2168,8 +2169,8 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
>>  
>>  	ds->pebs_index = ds->pebs_buffer_base;
>>  
>> -	mask = (1ULL << x86_pmu.max_pebs_events) - 1;
>> -	size = x86_pmu.max_pebs_events;
>> +	mask = x86_pmu.pebs_events_mask;
>> +	size = max_pebs_events;
> 
> This is wrong.. you have 8 bits to iterate, of which only the top 4 are
> set.
> 
>>  	if (x86_pmu.flags & PMU_FL_PEBS_ALL) {
>>  		mask |= ((1ULL << x86_pmu.num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED;
>>  		size = INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed;
>> @@ -2208,8 +2209,8 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
>>  			pebs_status = p->status = cpuc->pebs_enabled;
>>  
>>  		bit = find_first_bit((unsigned long *)&pebs_status,
>> -					x86_pmu.max_pebs_events);
>> -		if (bit >= x86_pmu.max_pebs_events)
>> +					max_pebs_events);
>> +		if (bit >= max_pebs_events)
>>  			continue;
> 
> But the bit check here then truncates the thing to the lower 4 bits
> which are all 0.
> 
> Should this not be something like:
> 
> 		if (!(pebs_event_mask & (1 << bit)))
> 			continue;
> 
> ?
> 

For the existing hardware, I don't think it's necessary. The counters
are contiguous.

Thanks,
Kan
Re: [RESEND PATCH 01/12] perf/x86/intel: Support the PEBS event mask
Posted by Peter Zijlstra 1 year, 5 months ago
On Thu, Jun 20, 2024 at 11:58:42AM -0400, Liang, Kan wrote:
> 
> 
> On 2024-06-20 3:02 a.m., Peter Zijlstra wrote:
> > On Tue, Jun 18, 2024 at 08:10:33AM -0700, kan.liang@linux.intel.com wrote:
> >> diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
> >> index e010bfed8417..a0104c82baed 100644
> >> --- a/arch/x86/events/intel/ds.c
> >> +++ b/arch/x86/events/intel/ds.c
> > 
> >> @@ -2157,6 +2157,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
> >>  	void *base, *at, *top;
> >>  	short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
> >>  	short error[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
> >> +	int max_pebs_events = hweight64(x86_pmu.pebs_events_mask);
> > 
> > Consider pebs_events_mask = 0xf0, then max_pebs_events becomes 4.
> 
> The intel_pmu_drain_pebs_nhm() is a NHM specific function. It's
> impossible that there is a pebs_events_mask = 0xf0.
> 
> There are only 4 counters. The mask should always be 0xf.
> > 
> >>  	int bit, i, size;
> >>  	u64 mask;
> >>  
> >> @@ -2168,8 +2169,8 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
> >>  
> >>  	ds->pebs_index = ds->pebs_buffer_base;
> >>  
> >> -	mask = (1ULL << x86_pmu.max_pebs_events) - 1;
> >> -	size = x86_pmu.max_pebs_events;
> >> +	mask = x86_pmu.pebs_events_mask;
> >> +	size = max_pebs_events;
> > 
> > This is wrong.. you have 8 bits to iterate, of which only the top 4 are
> > set.
> > 
> >>  	if (x86_pmu.flags & PMU_FL_PEBS_ALL) {
> >>  		mask |= ((1ULL << x86_pmu.num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED;
> >>  		size = INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed;
> >> @@ -2208,8 +2209,8 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
> >>  			pebs_status = p->status = cpuc->pebs_enabled;
> >>  
> >>  		bit = find_first_bit((unsigned long *)&pebs_status,
> >> -					x86_pmu.max_pebs_events);
> >> -		if (bit >= x86_pmu.max_pebs_events)
> >> +					max_pebs_events);
> >> +		if (bit >= max_pebs_events)
> >>  			continue;
> > 
> > But the bit check here then truncates the thing to the lower 4 bits
> > which are all 0.
> > 
> > Should this not be something like:
> > 
> > 		if (!(pebs_event_mask & (1 << bit)))
> > 			continue;
> > 
> > ?
> > 
> 
> For the existing hardware, I don't think it's necessary. The counters
> are contiguous.

Sure, but it still makes no sense. Fundamentally the operation is wrong,
it works by accident in this one special case. The moment someone reuses
the code (or copy/pastes) it and it goes outside the special case it
goes to hell.

Also, anybody that actually understands bitops will have a giant WTF
when they read this -- which is what happened here.
Re: [RESEND PATCH 01/12] perf/x86/intel: Support the PEBS event mask
Posted by Liang, Kan 1 year, 5 months ago

On 2024-06-20 11:58 a.m., Liang, Kan wrote:
> 
> 
> On 2024-06-20 3:02 a.m., Peter Zijlstra wrote:
>> On Tue, Jun 18, 2024 at 08:10:33AM -0700, kan.liang@linux.intel.com wrote:
>>> diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
>>> index e010bfed8417..a0104c82baed 100644
>>> --- a/arch/x86/events/intel/ds.c
>>> +++ b/arch/x86/events/intel/ds.c
>>
>>> @@ -2157,6 +2157,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
>>>  	void *base, *at, *top;
>>>  	short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
>>>  	short error[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
>>> +	int max_pebs_events = hweight64(x86_pmu.pebs_events_mask);
>>
>> Consider pebs_events_mask = 0xf0, then max_pebs_events becomes 4.
> 
> The intel_pmu_drain_pebs_nhm() is a NHM specific function. It's
> impossible that there is a pebs_events_mask = 0xf0.
> 
> There are only 4 counters. The mask should always be 0xf.
>>
>>>  	int bit, i, size;
>>>  	u64 mask;
>>>  
>>> @@ -2168,8 +2169,8 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
>>>  
>>>  	ds->pebs_index = ds->pebs_buffer_base;
>>>  
>>> -	mask = (1ULL << x86_pmu.max_pebs_events) - 1;
>>> -	size = x86_pmu.max_pebs_events;
>>> +	mask = x86_pmu.pebs_events_mask;
>>> +	size = max_pebs_events;
>>
>> This is wrong.. you have 8 bits to iterate, of which only the top 4 are
>> set.
>>
>>>  	if (x86_pmu.flags & PMU_FL_PEBS_ALL) {
>>>  		mask |= ((1ULL << x86_pmu.num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED;
>>>  		size = INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed;
>>> @@ -2208,8 +2209,8 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
>>>  			pebs_status = p->status = cpuc->pebs_enabled;
>>>  
>>>  		bit = find_first_bit((unsigned long *)&pebs_status,
>>> -					x86_pmu.max_pebs_events);
>>> -		if (bit >= x86_pmu.max_pebs_events)
>>> +					max_pebs_events);
>>> +		if (bit >= max_pebs_events)
>>>  			continue;
>>
>> But the bit check here then truncates the thing to the lower 4 bits
>> which are all 0.
>>
>> Should this not be something like:
>>
>> 		if (!(pebs_event_mask & (1 << bit)))
>> 			continue;
>>
>> ?
>>
> 
> For the existing hardware, I don't think it's necessary. The counters
> are contiguous.
> 

Think about it twice. Although either code works, we should try to make
the code as generic as possible.

I will introduce a generic x86_pmu_max_num_pebs() and check the highest
set bit, rather than hweight64. It can be used by both NHM and the
future platforms.


Thanks,
Kan
Re: [RESEND PATCH 01/12] perf/x86/intel: Support the PEBS event mask
Posted by Peter Zijlstra 1 year, 5 months ago
On Fri, Jun 21, 2024 at 10:19:34AM -0400, Liang, Kan wrote:
> Think about it twice. Although either code works, we should try to make
> the code as generic as possible.

That! Thanks!