[v2] x86 perf bug fixes and optimization

[Patch v2 3/6] perf/x86: Check if cpuc->events[*] pointer exists before accessing it

Posted by Dapeng Mi 1 month, 3 weeks ago

The PMI handler could disable some events as the interrupt throttling
and clear the corresponding items in cpuc->events[] array.

perf_event_overflow()
  -> __perf_event_overflow()
    ->__perf_event_account_interrupt()
      -> perf_event_throttle_group()
        -> perf_event_throttle()
          -> event->pmu->stop()
            -> x86_pmu_stop()

Moreover PMI is NMI on x86 platform and it could interrupt other perf
code like setup_pebs_adaptive_sample_data(). So once PMI handling
finishes and returns into setup_pebs_adaptive_sample_data() and it could
find the cpuc->events[*] becomes NULL and accessing this NULL pointer
triggers an invalid memory access and leads to kernel crashes eventually.

Thus add NULL check before accessing cpuc->events[*] pointer.

Reported-by: kernel test robot <oliver.sang@intel.com>
Closes: https://lore.kernel.org/oe-lkp/202507042103.a15d2923-lkp@intel.com
Fixes: 9734e25fbf5a ("perf: Fix the throttle logic for a group")
Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Tested-by: kernel test robot <oliver.sang@intel.com>
---
 arch/x86/events/core.c       |  3 +++
 arch/x86/events/intel/core.c |  6 +++++-
 arch/x86/events/intel/ds.c   | 13 ++++++-------
 3 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 7610f26dfbd9..f0a3bc57157d 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1711,6 +1711,9 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
 			continue;
 
 		event = cpuc->events[idx];
+		if (!event)
+			continue;
+
 		last_period = event->hw.last_period;
 
 		val = static_call(x86_pmu_update)(event);
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 15da60cf69f2..386717b75a09 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2718,6 +2718,8 @@ static void update_saved_topdown_regs(struct perf_event *event, u64 slots,
 		if (!is_topdown_idx(idx))
 			continue;
 		other = cpuc->events[idx];
+		if (!other)
+			continue;
 		other->hw.saved_slots = slots;
 		other->hw.saved_metric = metrics;
 	}
@@ -2761,6 +2763,8 @@ static u64 intel_update_topdown_event(struct perf_event *event, int metric_end,
 		if (!is_topdown_idx(idx))
 			continue;
 		other = cpuc->events[idx];
+		if (!other)
+			continue;
 		__icl_update_topdown_event(other, slots, metrics,
 					   event ? event->hw.saved_slots : 0,
 					   event ? event->hw.saved_metric : 0);
@@ -3138,7 +3142,7 @@ static void x86_pmu_handle_guest_pebs(struct pt_regs *regs,
 
 	for_each_set_bit(bit, (unsigned long *)&guest_pebs_idxs, X86_PMC_IDX_MAX) {
 		event = cpuc->events[bit];
-		if (!event->attr.precise_ip)
+		if (!event || !event->attr.precise_ip)
 			continue;
 
 		perf_sample_data_init(data, 0, event->hw.last_period);
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index c0b7ac1c7594..b23c49e2e06f 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -2480,6 +2480,8 @@ static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, u64
 	 */
 	for_each_set_bit(bit, (unsigned long *)&pebs_enabled, X86_PMC_IDX_MAX) {
 		event = cpuc->events[bit];
+		if (!event)
+			continue;
 		if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
 			intel_pmu_save_and_restart_reload(event, 0);
 	}
@@ -2579,10 +2581,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
 			continue;
 
 		event = cpuc->events[bit];
-		if (WARN_ON_ONCE(!event))
-			continue;
-
-		if (WARN_ON_ONCE(!event->attr.precise_ip))
+		if (!event || WARN_ON_ONCE(!event->attr.precise_ip))
 			continue;
 
 		/* log dropped samples number */
@@ -2645,9 +2644,7 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
 		pebs_status = basic->applicable_counters & cpuc->pebs_enabled & mask;
 		for_each_set_bit(bit, (unsigned long *)&pebs_status, X86_PMC_IDX_MAX) {
 			event = cpuc->events[bit];
-
-			if (WARN_ON_ONCE(!event) ||
-			    WARN_ON_ONCE(!event->attr.precise_ip))
+			if (!event || WARN_ON_ONCE(!event->attr.precise_ip))
 				continue;
 
 			if (counts[bit]++) {
@@ -2663,6 +2660,8 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
 			continue;
 
 		event = cpuc->events[bit];
+		if (!event)
+			continue;
 
 		__intel_pmu_pebs_last_event(event, iregs, regs, data, last[bit],
 					    counts[bit], setup_pebs_adaptive_sample_data);
-- 
2.34.1

Re: [Patch v2 3/6] perf/x86: Check if cpuc->events[*] pointer exists before accessing it

Posted by Peter Zijlstra 1 month, 2 weeks ago

On Mon, Aug 11, 2025 at 05:00:31PM +0800, Dapeng Mi wrote:
> The PMI handler could disable some events as the interrupt throttling
> and clear the corresponding items in cpuc->events[] array.
> 
> perf_event_overflow()
>   -> __perf_event_overflow()
>     ->__perf_event_account_interrupt()
>       -> perf_event_throttle_group()
>         -> perf_event_throttle()
>           -> event->pmu->stop()
>             -> x86_pmu_stop()
> 
> Moreover PMI is NMI on x86 platform and it could interrupt other perf
> code like setup_pebs_adaptive_sample_data(). 

Uhh, how? AFAICT we only do drain_pebs() from the PMI itself, or disable
the PMU first by clearing GLOBAL_CTRL.

> So once PMI handling
> finishes and returns into setup_pebs_adaptive_sample_data() and it could
> find the cpuc->events[*] becomes NULL and accessing this NULL pointer
> triggers an invalid memory access and leads to kernel crashes eventually.
> 
> Thus add NULL check before accessing cpuc->events[*] pointer.

This doesn't seem fully thought through.

If we do this NULL check, then the active_mask bittest is completely
superfluous and can be removed, no?

Also, what about this race:

	event = cpuc->events[idx]; // !NULL;
	<PMI>
		x86_pmu_stop()
		  cpuc->events[idx] = NULL;
	</PMI>
	... uses event

Worse, since it is a 'normal' load, it is permitted for the compiler to
re-issue the load, at which point it will still explode. IOW, it should
be READ_ONCE(), *if* we can live with the above race at all. Can we?

First though, you need to explain how we get here. Because drain_pebs()
nesting would be *BAD*.

> 
> Reported-by: kernel test robot <oliver.sang@intel.com>
> Closes: https://lore.kernel.org/oe-lkp/202507042103.a15d2923-lkp@intel.com
> Fixes: 9734e25fbf5a ("perf: Fix the throttle logic for a group")
> Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
> Tested-by: kernel test robot <oliver.sang@intel.com>
> ---
>  arch/x86/events/core.c       |  3 +++
>  arch/x86/events/intel/core.c |  6 +++++-
>  arch/x86/events/intel/ds.c   | 13 ++++++-------
>  3 files changed, 14 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
> index 7610f26dfbd9..f0a3bc57157d 100644
> --- a/arch/x86/events/core.c
> +++ b/arch/x86/events/core.c
> @@ -1711,6 +1711,9 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
>  			continue;
>  
>  		event = cpuc->events[idx];
> +		if (!event)
> +			continue;
> +
>  		last_period = event->hw.last_period;
>  
>  		val = static_call(x86_pmu_update)(event);
> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
> index 15da60cf69f2..386717b75a09 100644
> --- a/arch/x86/events/intel/core.c
> +++ b/arch/x86/events/intel/core.c
> @@ -2718,6 +2718,8 @@ static void update_saved_topdown_regs(struct perf_event *event, u64 slots,
>  		if (!is_topdown_idx(idx))
>  			continue;
>  		other = cpuc->events[idx];
> +		if (!other)
> +			continue;
>  		other->hw.saved_slots = slots;
>  		other->hw.saved_metric = metrics;
>  	}
> @@ -2761,6 +2763,8 @@ static u64 intel_update_topdown_event(struct perf_event *event, int metric_end,
>  		if (!is_topdown_idx(idx))
>  			continue;
>  		other = cpuc->events[idx];
> +		if (!other)
> +			continue;
>  		__icl_update_topdown_event(other, slots, metrics,
>  					   event ? event->hw.saved_slots : 0,
>  					   event ? event->hw.saved_metric : 0);
> @@ -3138,7 +3142,7 @@ static void x86_pmu_handle_guest_pebs(struct pt_regs *regs,
>  
>  	for_each_set_bit(bit, (unsigned long *)&guest_pebs_idxs, X86_PMC_IDX_MAX) {
>  		event = cpuc->events[bit];
> -		if (!event->attr.precise_ip)
> +		if (!event || !event->attr.precise_ip)
>  			continue;
>  
>  		perf_sample_data_init(data, 0, event->hw.last_period);
> diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
> index c0b7ac1c7594..b23c49e2e06f 100644
> --- a/arch/x86/events/intel/ds.c
> +++ b/arch/x86/events/intel/ds.c
> @@ -2480,6 +2480,8 @@ static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, u64
>  	 */
>  	for_each_set_bit(bit, (unsigned long *)&pebs_enabled, X86_PMC_IDX_MAX) {
>  		event = cpuc->events[bit];
> +		if (!event)
> +			continue;
>  		if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
>  			intel_pmu_save_and_restart_reload(event, 0);
>  	}
> @@ -2579,10 +2581,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
>  			continue;
>  
>  		event = cpuc->events[bit];
> -		if (WARN_ON_ONCE(!event))
> -			continue;
> -
> -		if (WARN_ON_ONCE(!event->attr.precise_ip))
> +		if (!event || WARN_ON_ONCE(!event->attr.precise_ip))
>  			continue;
>  
>  		/* log dropped samples number */
> @@ -2645,9 +2644,7 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
>  		pebs_status = basic->applicable_counters & cpuc->pebs_enabled & mask;
>  		for_each_set_bit(bit, (unsigned long *)&pebs_status, X86_PMC_IDX_MAX) {
>  			event = cpuc->events[bit];
> -
> -			if (WARN_ON_ONCE(!event) ||
> -			    WARN_ON_ONCE(!event->attr.precise_ip))
> +			if (!event || WARN_ON_ONCE(!event->attr.precise_ip))
>  				continue;
>  
>  			if (counts[bit]++) {
> @@ -2663,6 +2660,8 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
>  			continue;
>  
>  		event = cpuc->events[bit];
> +		if (!event)
> +			continue;
>  
>  		__intel_pmu_pebs_last_event(event, iregs, regs, data, last[bit],
>  					    counts[bit], setup_pebs_adaptive_sample_data);
> -- 
> 2.34.1
>

Re: [Patch v2 3/6] perf/x86: Check if cpuc->events[*] pointer exists before accessing it

Posted by Mi, Dapeng 1 month, 2 weeks ago

On 8/19/2025 4:45 PM, Peter Zijlstra wrote:
> On Mon, Aug 11, 2025 at 05:00:31PM +0800, Dapeng Mi wrote:
>> The PMI handler could disable some events as the interrupt throttling
>> and clear the corresponding items in cpuc->events[] array.
>>
>> perf_event_overflow()
>>   -> __perf_event_overflow()
>>     ->__perf_event_account_interrupt()
>>       -> perf_event_throttle_group()
>>         -> perf_event_throttle()
>>           -> event->pmu->stop()
>>             -> x86_pmu_stop()
>>
>> Moreover PMI is NMI on x86 platform and it could interrupt other perf
>> code like setup_pebs_adaptive_sample_data(). 
> Uhh, how? AFAICT we only do drain_pebs() from the PMI itself, or disable
> the PMU first by clearing GLOBAL_CTRL.
>
>> So once PMI handling
>> finishes and returns into setup_pebs_adaptive_sample_data() and it could
>> find the cpuc->events[*] becomes NULL and accessing this NULL pointer
>> triggers an invalid memory access and leads to kernel crashes eventually.
>>
>> Thus add NULL check before accessing cpuc->events[*] pointer.
> This doesn't seem fully thought through.
>
> If we do this NULL check, then the active_mask bittest is completely
> superfluous and can be removed, no?
>
> Also, what about this race:
>
> 	event = cpuc->events[idx]; // !NULL;
> 	<PMI>
> 		x86_pmu_stop()
> 		  cpuc->events[idx] = NULL;
> 	</PMI>
> 	... uses event
>
> Worse, since it is a 'normal' load, it is permitted for the compiler to
> re-issue the load, at which point it will still explode. IOW, it should
> be READ_ONCE(), *if* we can live with the above race at all. Can we?
>
> First though, you need to explain how we get here. Because drain_pebs()
> nesting would be *BAD*.

I suppose I made a mistake on explaining why the issue happens. Since I
can't reproduce this issue locally (Synced with Oliver and the issue seems
can be produced on a specific SPR model), I can only guess the root case. I
originally thought drain_pebs() helper could be interrupted by PMI and then
cause the issue, but as Kan said, it's not true as PMU is always disabled
before draining PEBS buffer.

So after thinking twice,  I suppose the reason should be

    When intel_pmu_drain_pebs_icl() is called to drain PEBS records, the
    perf_event_overflow() could be called to process the last PEBS record.

    While perf_event_overflow() could trigger the interrupt throttle and
    stop all events of the group, like what the below call-chain shows.

    perf_event_overflow()
      -> __perf_event_overflow()
        ->__perf_event_account_interrupt()
          -> perf_event_throttle_group()
            -> perf_event_throttle()
              -> event->pmu->stop()
                -> x86_pmu_stop()

    The side effect of stopping the events is that all corresponding event
    pointers in cpuc->events[] array are cleared to NULL.

    Assume there are two PEBS events (event a and event b) in a group. When
    intel_pmu_drain_pebs_icl() calls perf_event_overflow() to process the
    last PEBS record of PEBS event a, interrupt throttle is triggered and
    all pointers of event a and event b are cleared to NULL. Then
    intel_pmu_drain_pebs_icl() tries to process the last PEBS record of
    event b and encounters NULL pointer access.

I would cook a v3 patch to update the commit message and code. Thanks.


>
>> Reported-by: kernel test robot <oliver.sang@intel.com>
>> Closes: https://lore.kernel.org/oe-lkp/202507042103.a15d2923-lkp@intel.com
>> Fixes: 9734e25fbf5a ("perf: Fix the throttle logic for a group")
>> Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
>> Tested-by: kernel test robot <oliver.sang@intel.com>
>> ---
>>  arch/x86/events/core.c       |  3 +++
>>  arch/x86/events/intel/core.c |  6 +++++-
>>  arch/x86/events/intel/ds.c   | 13 ++++++-------
>>  3 files changed, 14 insertions(+), 8 deletions(-)
>>
>> diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
>> index 7610f26dfbd9..f0a3bc57157d 100644
>> --- a/arch/x86/events/core.c
>> +++ b/arch/x86/events/core.c
>> @@ -1711,6 +1711,9 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
>>  			continue;
>>  
>>  		event = cpuc->events[idx];
>> +		if (!event)
>> +			continue;
>> +
>>  		last_period = event->hw.last_period;
>>  
>>  		val = static_call(x86_pmu_update)(event);
>> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
>> index 15da60cf69f2..386717b75a09 100644
>> --- a/arch/x86/events/intel/core.c
>> +++ b/arch/x86/events/intel/core.c
>> @@ -2718,6 +2718,8 @@ static void update_saved_topdown_regs(struct perf_event *event, u64 slots,
>>  		if (!is_topdown_idx(idx))
>>  			continue;
>>  		other = cpuc->events[idx];
>> +		if (!other)
>> +			continue;
>>  		other->hw.saved_slots = slots;
>>  		other->hw.saved_metric = metrics;
>>  	}
>> @@ -2761,6 +2763,8 @@ static u64 intel_update_topdown_event(struct perf_event *event, int metric_end,
>>  		if (!is_topdown_idx(idx))
>>  			continue;
>>  		other = cpuc->events[idx];
>> +		if (!other)
>> +			continue;
>>  		__icl_update_topdown_event(other, slots, metrics,
>>  					   event ? event->hw.saved_slots : 0,
>>  					   event ? event->hw.saved_metric : 0);
>> @@ -3138,7 +3142,7 @@ static void x86_pmu_handle_guest_pebs(struct pt_regs *regs,
>>  
>>  	for_each_set_bit(bit, (unsigned long *)&guest_pebs_idxs, X86_PMC_IDX_MAX) {
>>  		event = cpuc->events[bit];
>> -		if (!event->attr.precise_ip)
>> +		if (!event || !event->attr.precise_ip)
>>  			continue;
>>  
>>  		perf_sample_data_init(data, 0, event->hw.last_period);
>> diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
>> index c0b7ac1c7594..b23c49e2e06f 100644
>> --- a/arch/x86/events/intel/ds.c
>> +++ b/arch/x86/events/intel/ds.c
>> @@ -2480,6 +2480,8 @@ static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, u64
>>  	 */
>>  	for_each_set_bit(bit, (unsigned long *)&pebs_enabled, X86_PMC_IDX_MAX) {
>>  		event = cpuc->events[bit];
>> +		if (!event)
>> +			continue;
>>  		if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
>>  			intel_pmu_save_and_restart_reload(event, 0);
>>  	}
>> @@ -2579,10 +2581,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
>>  			continue;
>>  
>>  		event = cpuc->events[bit];
>> -		if (WARN_ON_ONCE(!event))
>> -			continue;
>> -
>> -		if (WARN_ON_ONCE(!event->attr.precise_ip))
>> +		if (!event || WARN_ON_ONCE(!event->attr.precise_ip))
>>  			continue;
>>  
>>  		/* log dropped samples number */
>> @@ -2645,9 +2644,7 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
>>  		pebs_status = basic->applicable_counters & cpuc->pebs_enabled & mask;
>>  		for_each_set_bit(bit, (unsigned long *)&pebs_status, X86_PMC_IDX_MAX) {
>>  			event = cpuc->events[bit];
>> -
>> -			if (WARN_ON_ONCE(!event) ||
>> -			    WARN_ON_ONCE(!event->attr.precise_ip))
>> +			if (!event || WARN_ON_ONCE(!event->attr.precise_ip))
>>  				continue;
>>  
>>  			if (counts[bit]++) {
>> @@ -2663,6 +2660,8 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
>>  			continue;
>>  
>>  		event = cpuc->events[bit];
>> +		if (!event)
>> +			continue;
>>  
>>  		__intel_pmu_pebs_last_event(event, iregs, regs, data, last[bit],
>>  					    counts[bit], setup_pebs_adaptive_sample_data);
>> -- 
>> 2.34.1
>>

Re: [Patch v2 3/6] perf/x86: Check if cpuc->events[*] pointer exists before accessing it

Posted by Mi, Dapeng 1 month, 2 weeks ago

On 8/19/2025 5:21 PM, Mi, Dapeng wrote:
> On 8/19/2025 4:45 PM, Peter Zijlstra wrote:
>> On Mon, Aug 11, 2025 at 05:00:31PM +0800, Dapeng Mi wrote:
>>> The PMI handler could disable some events as the interrupt throttling
>>> and clear the corresponding items in cpuc->events[] array.
>>>
>>> perf_event_overflow()
>>>   -> __perf_event_overflow()
>>>     ->__perf_event_account_interrupt()
>>>       -> perf_event_throttle_group()
>>>         -> perf_event_throttle()
>>>           -> event->pmu->stop()
>>>             -> x86_pmu_stop()
>>>
>>> Moreover PMI is NMI on x86 platform and it could interrupt other perf
>>> code like setup_pebs_adaptive_sample_data(). 
>> Uhh, how? AFAICT we only do drain_pebs() from the PMI itself, or disable
>> the PMU first by clearing GLOBAL_CTRL.
>>
>>> So once PMI handling
>>> finishes and returns into setup_pebs_adaptive_sample_data() and it could
>>> find the cpuc->events[*] becomes NULL and accessing this NULL pointer
>>> triggers an invalid memory access and leads to kernel crashes eventually.
>>>
>>> Thus add NULL check before accessing cpuc->events[*] pointer.
>> This doesn't seem fully thought through.
>>
>> If we do this NULL check, then the active_mask bittest is completely
>> superfluous and can be removed, no?
>>
>> Also, what about this race:
>>
>> 	event = cpuc->events[idx]; // !NULL;
>> 	<PMI>
>> 		x86_pmu_stop()
>> 		  cpuc->events[idx] = NULL;
>> 	</PMI>
>> 	... uses event
>>
>> Worse, since it is a 'normal' load, it is permitted for the compiler to
>> re-issue the load, at which point it will still explode. IOW, it should
>> be READ_ONCE(), *if* we can live with the above race at all. Can we?
>>
>> First though, you need to explain how we get here. Because drain_pebs()
>> nesting would be *BAD*.
> I suppose I made a mistake on explaining why the issue happens. Since I
> can't reproduce this issue locally (Synced with Oliver and the issue seems
> can be produced on a specific SPR model), I can only guess the root case. I
> originally thought drain_pebs() helper could be interrupted by PMI and then
> cause the issue, but as Kan said, it's not true as PMU is always disabled
> before draining PEBS buffer.
>
> So after thinking twice,  I suppose the reason should be
>
>     When intel_pmu_drain_pebs_icl() is called to drain PEBS records, the
>     perf_event_overflow() could be called to process the last PEBS record.
>
>     While perf_event_overflow() could trigger the interrupt throttle and
>     stop all events of the group, like what the below call-chain shows.
>
>     perf_event_overflow()
>       -> __perf_event_overflow()
>         ->__perf_event_account_interrupt()
>           -> perf_event_throttle_group()
>             -> perf_event_throttle()
>               -> event->pmu->stop()
>                 -> x86_pmu_stop()
>
>     The side effect of stopping the events is that all corresponding event
>     pointers in cpuc->events[] array are cleared to NULL.
>
>     Assume there are two PEBS events (event a and event b) in a group. When
>     intel_pmu_drain_pebs_icl() calls perf_event_overflow() to process the
>     last PEBS record of PEBS event a, interrupt throttle is triggered and
>     all pointers of event a and event b are cleared to NULL. Then
>     intel_pmu_drain_pebs_icl() tries to process the last PEBS record of
>     event b and encounters NULL pointer access.
>
> I would cook a v3 patch to update the commit message and code. Thanks.

Forgot to say, for this scenario, suppose we can directly skip to process
the last PEBS record if the event pointer is NULL since the last PEBS
record has been processed when stopping the event previously.


>
>
>>> Reported-by: kernel test robot <oliver.sang@intel.com>
>>> Closes: https://lore.kernel.org/oe-lkp/202507042103.a15d2923-lkp@intel.com
>>> Fixes: 9734e25fbf5a ("perf: Fix the throttle logic for a group")
>>> Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
>>> Tested-by: kernel test robot <oliver.sang@intel.com>
>>> ---
>>>  arch/x86/events/core.c       |  3 +++
>>>  arch/x86/events/intel/core.c |  6 +++++-
>>>  arch/x86/events/intel/ds.c   | 13 ++++++-------
>>>  3 files changed, 14 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
>>> index 7610f26dfbd9..f0a3bc57157d 100644
>>> --- a/arch/x86/events/core.c
>>> +++ b/arch/x86/events/core.c
>>> @@ -1711,6 +1711,9 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
>>>  			continue;
>>>  
>>>  		event = cpuc->events[idx];
>>> +		if (!event)
>>> +			continue;
>>> +
>>>  		last_period = event->hw.last_period;
>>>  
>>>  		val = static_call(x86_pmu_update)(event);
>>> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
>>> index 15da60cf69f2..386717b75a09 100644
>>> --- a/arch/x86/events/intel/core.c
>>> +++ b/arch/x86/events/intel/core.c
>>> @@ -2718,6 +2718,8 @@ static void update_saved_topdown_regs(struct perf_event *event, u64 slots,
>>>  		if (!is_topdown_idx(idx))
>>>  			continue;
>>>  		other = cpuc->events[idx];
>>> +		if (!other)
>>> +			continue;
>>>  		other->hw.saved_slots = slots;
>>>  		other->hw.saved_metric = metrics;
>>>  	}
>>> @@ -2761,6 +2763,8 @@ static u64 intel_update_topdown_event(struct perf_event *event, int metric_end,
>>>  		if (!is_topdown_idx(idx))
>>>  			continue;
>>>  		other = cpuc->events[idx];
>>> +		if (!other)
>>> +			continue;
>>>  		__icl_update_topdown_event(other, slots, metrics,
>>>  					   event ? event->hw.saved_slots : 0,
>>>  					   event ? event->hw.saved_metric : 0);
>>> @@ -3138,7 +3142,7 @@ static void x86_pmu_handle_guest_pebs(struct pt_regs *regs,
>>>  
>>>  	for_each_set_bit(bit, (unsigned long *)&guest_pebs_idxs, X86_PMC_IDX_MAX) {
>>>  		event = cpuc->events[bit];
>>> -		if (!event->attr.precise_ip)
>>> +		if (!event || !event->attr.precise_ip)
>>>  			continue;
>>>  
>>>  		perf_sample_data_init(data, 0, event->hw.last_period);
>>> diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
>>> index c0b7ac1c7594..b23c49e2e06f 100644
>>> --- a/arch/x86/events/intel/ds.c
>>> +++ b/arch/x86/events/intel/ds.c
>>> @@ -2480,6 +2480,8 @@ static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, u64
>>>  	 */
>>>  	for_each_set_bit(bit, (unsigned long *)&pebs_enabled, X86_PMC_IDX_MAX) {
>>>  		event = cpuc->events[bit];
>>> +		if (!event)
>>> +			continue;
>>>  		if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
>>>  			intel_pmu_save_and_restart_reload(event, 0);
>>>  	}
>>> @@ -2579,10 +2581,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
>>>  			continue;
>>>  
>>>  		event = cpuc->events[bit];
>>> -		if (WARN_ON_ONCE(!event))
>>> -			continue;
>>> -
>>> -		if (WARN_ON_ONCE(!event->attr.precise_ip))
>>> +		if (!event || WARN_ON_ONCE(!event->attr.precise_ip))
>>>  			continue;
>>>  
>>>  		/* log dropped samples number */
>>> @@ -2645,9 +2644,7 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
>>>  		pebs_status = basic->applicable_counters & cpuc->pebs_enabled & mask;
>>>  		for_each_set_bit(bit, (unsigned long *)&pebs_status, X86_PMC_IDX_MAX) {
>>>  			event = cpuc->events[bit];
>>> -
>>> -			if (WARN_ON_ONCE(!event) ||
>>> -			    WARN_ON_ONCE(!event->attr.precise_ip))
>>> +			if (!event || WARN_ON_ONCE(!event->attr.precise_ip))
>>>  				continue;
>>>  
>>>  			if (counts[bit]++) {
>>> @@ -2663,6 +2660,8 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
>>>  			continue;
>>>  
>>>  		event = cpuc->events[bit];
>>> +		if (!event)
>>> +			continue;
>>>  
>>>  		__intel_pmu_pebs_last_event(event, iregs, regs, data, last[bit],
>>>  					    counts[bit], setup_pebs_adaptive_sample_data);
>>> -- 
>>> 2.34.1
>>>

Re: [Patch v2 3/6] perf/x86: Check if cpuc->events[*] pointer exists before accessing it

Posted by Liang, Kan 1 month, 3 weeks ago


On 2025-08-11 2:00 a.m., Dapeng Mi wrote:
> The PMI handler could disable some events as the interrupt throttling
> and clear the corresponding items in cpuc->events[] array.
> 
> perf_event_overflow()
>   -> __perf_event_overflow()
>     ->__perf_event_account_interrupt()
>       -> perf_event_throttle_group()
>         -> perf_event_throttle()
>           -> event->pmu->stop()
>             -> x86_pmu_stop()
> 
> Moreover PMI is NMI on x86 platform and it could interrupt other perf
> code like setup_pebs_adaptive_sample_data(). 

The PMU is disabled when draining the PEBS records. I don't think a PMI
can be triggered in the setup_pebs_adaptive_sample_data().

> So once PMI handling
> finishes and returns into setup_pebs_adaptive_sample_data() and it could
> find the cpuc->events[*] becomes NULL and accessing this NULL pointer
> triggers an invalid memory access and leads to kernel crashes eventually.

The commit 9734e25fbf5a stops all events in a group when processing the
last records of the leader event. For large PEBS, it's possible that
there are still some records for member events left. It should be the
root cause of the NULL pointer. If so, we should drain those records as
well.

Thanks,
Kan>
> Thus add NULL check before accessing cpuc->events[*] pointer.
> 
> Reported-by: kernel test robot <oliver.sang@intel.com>
> Closes: https://lore.kernel.org/oe-lkp/202507042103.a15d2923-lkp@intel.com
> Fixes: 9734e25fbf5a ("perf: Fix the throttle logic for a group")
> Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
> Tested-by: kernel test robot <oliver.sang@intel.com>
> ---
>  arch/x86/events/core.c       |  3 +++
>  arch/x86/events/intel/core.c |  6 +++++-
>  arch/x86/events/intel/ds.c   | 13 ++++++-------
>  3 files changed, 14 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
> index 7610f26dfbd9..f0a3bc57157d 100644
> --- a/arch/x86/events/core.c
> +++ b/arch/x86/events/core.c
> @@ -1711,6 +1711,9 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
>  			continue;
>  
>  		event = cpuc->events[idx];
> +		if (!event)
> +			continue;
> +
>  		last_period = event->hw.last_period;
>  
>  		val = static_call(x86_pmu_update)(event);
> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
> index 15da60cf69f2..386717b75a09 100644
> --- a/arch/x86/events/intel/core.c
> +++ b/arch/x86/events/intel/core.c
> @@ -2718,6 +2718,8 @@ static void update_saved_topdown_regs(struct perf_event *event, u64 slots,
>  		if (!is_topdown_idx(idx))
>  			continue;
>  		other = cpuc->events[idx];
> +		if (!other)
> +			continue;
>  		other->hw.saved_slots = slots;
>  		other->hw.saved_metric = metrics;
>  	}
> @@ -2761,6 +2763,8 @@ static u64 intel_update_topdown_event(struct perf_event *event, int metric_end,
>  		if (!is_topdown_idx(idx))
>  			continue;
>  		other = cpuc->events[idx];
> +		if (!other)
> +			continue;
>  		__icl_update_topdown_event(other, slots, metrics,
>  					   event ? event->hw.saved_slots : 0,
>  					   event ? event->hw.saved_metric : 0);
> @@ -3138,7 +3142,7 @@ static void x86_pmu_handle_guest_pebs(struct pt_regs *regs,
>  
>  	for_each_set_bit(bit, (unsigned long *)&guest_pebs_idxs, X86_PMC_IDX_MAX) {
>  		event = cpuc->events[bit];
> -		if (!event->attr.precise_ip)
> +		if (!event || !event->attr.precise_ip)
>  			continue;
>  
>  		perf_sample_data_init(data, 0, event->hw.last_period);
> diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
> index c0b7ac1c7594..b23c49e2e06f 100644
> --- a/arch/x86/events/intel/ds.c
> +++ b/arch/x86/events/intel/ds.c
> @@ -2480,6 +2480,8 @@ static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, u64
>  	 */
>  	for_each_set_bit(bit, (unsigned long *)&pebs_enabled, X86_PMC_IDX_MAX) {
>  		event = cpuc->events[bit];
> +		if (!event)
> +			continue;
>  		if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
>  			intel_pmu_save_and_restart_reload(event, 0);
>  	}
> @@ -2579,10 +2581,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
>  			continue;
>  
>  		event = cpuc->events[bit];
> -		if (WARN_ON_ONCE(!event))
> -			continue;
> -
> -		if (WARN_ON_ONCE(!event->attr.precise_ip))
> +		if (!event || WARN_ON_ONCE(!event->attr.precise_ip))
>  			continue;
>  
>  		/* log dropped samples number */
> @@ -2645,9 +2644,7 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
>  		pebs_status = basic->applicable_counters & cpuc->pebs_enabled & mask;
>  		for_each_set_bit(bit, (unsigned long *)&pebs_status, X86_PMC_IDX_MAX) {
>  			event = cpuc->events[bit];
> -
> -			if (WARN_ON_ONCE(!event) ||
> -			    WARN_ON_ONCE(!event->attr.precise_ip))
> +			if (!event || WARN_ON_ONCE(!event->attr.precise_ip))
>  				continue;
>  
>  			if (counts[bit]++) {
> @@ -2663,6 +2660,8 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
>  			continue;
>  
>  		event = cpuc->events[bit];
> +		if (!event)
> +			continue;
>  
>  		__intel_pmu_pebs_last_event(event, iregs, regs, data, last[bit],
>  					    counts[bit], setup_pebs_adaptive_sample_data);

Re: [Patch v2 3/6] perf/x86: Check if cpuc->events[*] pointer exists before accessing it

Posted by Mi, Dapeng 1 month, 3 weeks ago

On 8/12/2025 7:32 AM, Liang, Kan wrote:
>
> On 2025-08-11 2:00 a.m., Dapeng Mi wrote:
>> The PMI handler could disable some events as the interrupt throttling
>> and clear the corresponding items in cpuc->events[] array.
>>
>> perf_event_overflow()
>>   -> __perf_event_overflow()
>>     ->__perf_event_account_interrupt()
>>       -> perf_event_throttle_group()
>>         -> perf_event_throttle()
>>           -> event->pmu->stop()
>>             -> x86_pmu_stop()
>>
>> Moreover PMI is NMI on x86 platform and it could interrupt other perf
>> code like setup_pebs_adaptive_sample_data(). 
> The PMU is disabled when draining the PEBS records. I don't think a PMI
> can be triggered in the setup_pebs_adaptive_sample_data().

Besides in NMI handler, the drain_pebs helper intel_pmu_drain_pebs_buffer()
could be called in many places, like context switch and PEBS event
disabling. All these places could be interrupted by the NMI handler, and
then the trigger this NULL pointer access issue.


>
>> So once PMI handling
>> finishes and returns into setup_pebs_adaptive_sample_data() and it could
>> find the cpuc->events[*] becomes NULL and accessing this NULL pointer
>> triggers an invalid memory access and leads to kernel crashes eventually.
> The commit 9734e25fbf5a stops all events in a group when processing the
> last records of the leader event. For large PEBS, it's possible that
> there are still some records for member events left. It should be the
> root cause of the NULL pointer. If so, we should drain those records as
> well.

The left PEBS record would always be cleared by
intel_pmu_drain_large_pebs() when disabling PEBS event.


>
> Thanks,
> Kan>
>> Thus add NULL check before accessing cpuc->events[*] pointer.
>>
>> Reported-by: kernel test robot <oliver.sang@intel.com>
>> Closes: https://lore.kernel.org/oe-lkp/202507042103.a15d2923-lkp@intel.com
>> Fixes: 9734e25fbf5a ("perf: Fix the throttle logic for a group")
>> Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
>> Tested-by: kernel test robot <oliver.sang@intel.com>
>> ---
>>  arch/x86/events/core.c       |  3 +++
>>  arch/x86/events/intel/core.c |  6 +++++-
>>  arch/x86/events/intel/ds.c   | 13 ++++++-------
>>  3 files changed, 14 insertions(+), 8 deletions(-)
>>
>> diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
>> index 7610f26dfbd9..f0a3bc57157d 100644
>> --- a/arch/x86/events/core.c
>> +++ b/arch/x86/events/core.c
>> @@ -1711,6 +1711,9 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
>>  			continue;
>>  
>>  		event = cpuc->events[idx];
>> +		if (!event)
>> +			continue;
>> +
>>  		last_period = event->hw.last_period;
>>  
>>  		val = static_call(x86_pmu_update)(event);
>> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
>> index 15da60cf69f2..386717b75a09 100644
>> --- a/arch/x86/events/intel/core.c
>> +++ b/arch/x86/events/intel/core.c
>> @@ -2718,6 +2718,8 @@ static void update_saved_topdown_regs(struct perf_event *event, u64 slots,
>>  		if (!is_topdown_idx(idx))
>>  			continue;
>>  		other = cpuc->events[idx];
>> +		if (!other)
>> +			continue;
>>  		other->hw.saved_slots = slots;
>>  		other->hw.saved_metric = metrics;
>>  	}
>> @@ -2761,6 +2763,8 @@ static u64 intel_update_topdown_event(struct perf_event *event, int metric_end,
>>  		if (!is_topdown_idx(idx))
>>  			continue;
>>  		other = cpuc->events[idx];
>> +		if (!other)
>> +			continue;
>>  		__icl_update_topdown_event(other, slots, metrics,
>>  					   event ? event->hw.saved_slots : 0,
>>  					   event ? event->hw.saved_metric : 0);
>> @@ -3138,7 +3142,7 @@ static void x86_pmu_handle_guest_pebs(struct pt_regs *regs,
>>  
>>  	for_each_set_bit(bit, (unsigned long *)&guest_pebs_idxs, X86_PMC_IDX_MAX) {
>>  		event = cpuc->events[bit];
>> -		if (!event->attr.precise_ip)
>> +		if (!event || !event->attr.precise_ip)
>>  			continue;
>>  
>>  		perf_sample_data_init(data, 0, event->hw.last_period);
>> diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
>> index c0b7ac1c7594..b23c49e2e06f 100644
>> --- a/arch/x86/events/intel/ds.c
>> +++ b/arch/x86/events/intel/ds.c
>> @@ -2480,6 +2480,8 @@ static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, u64
>>  	 */
>>  	for_each_set_bit(bit, (unsigned long *)&pebs_enabled, X86_PMC_IDX_MAX) {
>>  		event = cpuc->events[bit];
>> +		if (!event)
>> +			continue;
>>  		if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
>>  			intel_pmu_save_and_restart_reload(event, 0);
>>  	}
>> @@ -2579,10 +2581,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
>>  			continue;
>>  
>>  		event = cpuc->events[bit];
>> -		if (WARN_ON_ONCE(!event))
>> -			continue;
>> -
>> -		if (WARN_ON_ONCE(!event->attr.precise_ip))
>> +		if (!event || WARN_ON_ONCE(!event->attr.precise_ip))
>>  			continue;
>>  
>>  		/* log dropped samples number */
>> @@ -2645,9 +2644,7 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
>>  		pebs_status = basic->applicable_counters & cpuc->pebs_enabled & mask;
>>  		for_each_set_bit(bit, (unsigned long *)&pebs_status, X86_PMC_IDX_MAX) {
>>  			event = cpuc->events[bit];
>> -
>> -			if (WARN_ON_ONCE(!event) ||
>> -			    WARN_ON_ONCE(!event->attr.precise_ip))
>> +			if (!event || WARN_ON_ONCE(!event->attr.precise_ip))
>>  				continue;
>>  
>>  			if (counts[bit]++) {
>> @@ -2663,6 +2660,8 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
>>  			continue;
>>  
>>  		event = cpuc->events[bit];
>> +		if (!event)
>> +			continue;
>>  
>>  		__intel_pmu_pebs_last_event(event, iregs, regs, data, last[bit],
>>  					    counts[bit], setup_pebs_adaptive_sample_data);

Re: [Patch v2 3/6] perf/x86: Check if cpuc->events[*] pointer exists before accessing it

Posted by Liang, Kan 1 month, 3 weeks ago


On 2025-08-11 7:33 p.m., Mi, Dapeng wrote:
> 
> On 8/12/2025 7:32 AM, Liang, Kan wrote:
>>
>> On 2025-08-11 2:00 a.m., Dapeng Mi wrote:
>>> The PMI handler could disable some events as the interrupt throttling
>>> and clear the corresponding items in cpuc->events[] array.
>>>
>>> perf_event_overflow()
>>>   -> __perf_event_overflow()
>>>     ->__perf_event_account_interrupt()
>>>       -> perf_event_throttle_group()
>>>         -> perf_event_throttle()
>>>           -> event->pmu->stop()
>>>             -> x86_pmu_stop()
>>>
>>> Moreover PMI is NMI on x86 platform and it could interrupt other perf
>>> code like setup_pebs_adaptive_sample_data(). 
>> The PMU is disabled when draining the PEBS records. I don't think a PMI
>> can be triggered in the setup_pebs_adaptive_sample_data().
> 
> Besides in NMI handler, the drain_pebs helper intel_pmu_drain_pebs_buffer()
> could be called in many places, like context switch and PEBS event
> disabling. 

Yes

> All these places could be interrupted by the NMI handler, and

No. Before draining the buffer, the PMU must be stopped. No NMI could be
triggered.

> then the trigger this NULL pointer access issue.
> 
> 
>>
>>> So once PMI handling
>>> finishes and returns into setup_pebs_adaptive_sample_data() and it could
>>> find the cpuc->events[*] becomes NULL and accessing this NULL pointer
>>> triggers an invalid memory access and leads to kernel crashes eventually.
>> The commit 9734e25fbf5a stops all events in a group when processing the
>> last records of the leader event. For large PEBS, it's possible that
>> there are still some records for member events left. It should be the
>> root cause of the NULL pointer. If so, we should drain those records as
>> well.
> 
> The left PEBS record would always be cleared by
> intel_pmu_drain_large_pebs() when disabling PEBS event.

When stopping the event.

Then you should only need the check in intel_pmu_drain_pebs_icl(), since
the stop only happens when handling the last event.

Thanks,
Kan>
> 
>>
>> Thanks,
>> Kan>
>>> Thus add NULL check before accessing cpuc->events[*] pointer.
>>>
>>> Reported-by: kernel test robot <oliver.sang@intel.com>
>>> Closes: https://lore.kernel.org/oe-lkp/202507042103.a15d2923-lkp@intel.com
>>> Fixes: 9734e25fbf5a ("perf: Fix the throttle logic for a group")
>>> Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
>>> Tested-by: kernel test robot <oliver.sang@intel.com>
>>> ---
>>>  arch/x86/events/core.c       |  3 +++
>>>  arch/x86/events/intel/core.c |  6 +++++-
>>>  arch/x86/events/intel/ds.c   | 13 ++++++-------
>>>  3 files changed, 14 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
>>> index 7610f26dfbd9..f0a3bc57157d 100644
>>> --- a/arch/x86/events/core.c
>>> +++ b/arch/x86/events/core.c
>>> @@ -1711,6 +1711,9 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
>>>  			continue;
>>>  
>>>  		event = cpuc->events[idx];
>>> +		if (!event)
>>> +			continue;
>>> +
>>>  		last_period = event->hw.last_period;
>>>  
>>>  		val = static_call(x86_pmu_update)(event);
>>> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
>>> index 15da60cf69f2..386717b75a09 100644
>>> --- a/arch/x86/events/intel/core.c
>>> +++ b/arch/x86/events/intel/core.c
>>> @@ -2718,6 +2718,8 @@ static void update_saved_topdown_regs(struct perf_event *event, u64 slots,
>>>  		if (!is_topdown_idx(idx))
>>>  			continue;
>>>  		other = cpuc->events[idx];
>>> +		if (!other)
>>> +			continue;
>>>  		other->hw.saved_slots = slots;
>>>  		other->hw.saved_metric = metrics;
>>>  	}
>>> @@ -2761,6 +2763,8 @@ static u64 intel_update_topdown_event(struct perf_event *event, int metric_end,
>>>  		if (!is_topdown_idx(idx))
>>>  			continue;
>>>  		other = cpuc->events[idx];
>>> +		if (!other)
>>> +			continue;
>>>  		__icl_update_topdown_event(other, slots, metrics,
>>>  					   event ? event->hw.saved_slots : 0,
>>>  					   event ? event->hw.saved_metric : 0);
>>> @@ -3138,7 +3142,7 @@ static void x86_pmu_handle_guest_pebs(struct pt_regs *regs,
>>>  
>>>  	for_each_set_bit(bit, (unsigned long *)&guest_pebs_idxs, X86_PMC_IDX_MAX) {
>>>  		event = cpuc->events[bit];
>>> -		if (!event->attr.precise_ip)
>>> +		if (!event || !event->attr.precise_ip)
>>>  			continue;
>>>  
>>>  		perf_sample_data_init(data, 0, event->hw.last_period);
>>> diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
>>> index c0b7ac1c7594..b23c49e2e06f 100644
>>> --- a/arch/x86/events/intel/ds.c
>>> +++ b/arch/x86/events/intel/ds.c
>>> @@ -2480,6 +2480,8 @@ static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, u64
>>>  	 */
>>>  	for_each_set_bit(bit, (unsigned long *)&pebs_enabled, X86_PMC_IDX_MAX) {
>>>  		event = cpuc->events[bit];
>>> +		if (!event)
>>> +			continue;
>>>  		if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
>>>  			intel_pmu_save_and_restart_reload(event, 0);
>>>  	}
>>> @@ -2579,10 +2581,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
>>>  			continue;
>>>  
>>>  		event = cpuc->events[bit];
>>> -		if (WARN_ON_ONCE(!event))
>>> -			continue;
>>> -
>>> -		if (WARN_ON_ONCE(!event->attr.precise_ip))
>>> +		if (!event || WARN_ON_ONCE(!event->attr.precise_ip))
>>>  			continue;
>>>  
>>>  		/* log dropped samples number */
>>> @@ -2645,9 +2644,7 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
>>>  		pebs_status = basic->applicable_counters & cpuc->pebs_enabled & mask;
>>>  		for_each_set_bit(bit, (unsigned long *)&pebs_status, X86_PMC_IDX_MAX) {
>>>  			event = cpuc->events[bit];
>>> -
>>> -			if (WARN_ON_ONCE(!event) ||
>>> -			    WARN_ON_ONCE(!event->attr.precise_ip))
>>> +			if (!event || WARN_ON_ONCE(!event->attr.precise_ip))
>>>  				continue;
>>>  
>>>  			if (counts[bit]++) {
>>> @@ -2663,6 +2660,8 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
>>>  			continue;
>>>  
>>>  		event = cpuc->events[bit];
>>> +		if (!event)
>>> +			continue;
>>>  
>>>  		__intel_pmu_pebs_last_event(event, iregs, regs, data, last[bit],
>>>  					    counts[bit], setup_pebs_adaptive_sample_data);
>

Re: [Patch v2 3/6] perf/x86: Check if cpuc->events[*] pointer exists before accessing it

Posted by Mi, Dapeng 1 month, 2 weeks ago

On 8/13/2025 2:16 AM, Liang, Kan wrote:
>
> On 2025-08-11 7:33 p.m., Mi, Dapeng wrote:
>> On 8/12/2025 7:32 AM, Liang, Kan wrote:
>>> On 2025-08-11 2:00 a.m., Dapeng Mi wrote:
>>>> The PMI handler could disable some events as the interrupt throttling
>>>> and clear the corresponding items in cpuc->events[] array.
>>>>
>>>> perf_event_overflow()
>>>>   -> __perf_event_overflow()
>>>>     ->__perf_event_account_interrupt()
>>>>       -> perf_event_throttle_group()
>>>>         -> perf_event_throttle()
>>>>           -> event->pmu->stop()
>>>>             -> x86_pmu_stop()
>>>>
>>>> Moreover PMI is NMI on x86 platform and it could interrupt other perf
>>>> code like setup_pebs_adaptive_sample_data(). 
>>> The PMU is disabled when draining the PEBS records. I don't think a PMI
>>> can be triggered in the setup_pebs_adaptive_sample_data().
>> Besides in NMI handler, the drain_pebs helper intel_pmu_drain_pebs_buffer()
>> could be called in many places, like context switch and PEBS event
>> disabling. 
> Yes
>
>> All these places could be interrupted by the NMI handler, and
> No. Before draining the buffer, the PMU must be stopped. No NMI could be
> triggered.
>
>> then the trigger this NULL pointer access issue.
>>
>>
>>>> So once PMI handling
>>>> finishes and returns into setup_pebs_adaptive_sample_data() and it could
>>>> find the cpuc->events[*] becomes NULL and accessing this NULL pointer
>>>> triggers an invalid memory access and leads to kernel crashes eventually.
>>> The commit 9734e25fbf5a stops all events in a group when processing the
>>> last records of the leader event. For large PEBS, it's possible that
>>> there are still some records for member events left. It should be the
>>> root cause of the NULL pointer. If so, we should drain those records as
>>> well.
>> The left PEBS record would always be cleared by
>> intel_pmu_drain_large_pebs() when disabling PEBS event.
> When stopping the event.
>
> Then you should only need the check in intel_pmu_drain_pebs_icl(), since
> the stop only happens when handling the last event.

Yes, it's enough to only add NULL check in intel_pmu_drain_pebs_icl(). Thanks.


>
> Thanks,
> Kan>
>>> Thanks,
>>> Kan>
>>>> Thus add NULL check before accessing cpuc->events[*] pointer.
>>>>
>>>> Reported-by: kernel test robot <oliver.sang@intel.com>
>>>> Closes: https://lore.kernel.org/oe-lkp/202507042103.a15d2923-lkp@intel.com
>>>> Fixes: 9734e25fbf5a ("perf: Fix the throttle logic for a group")
>>>> Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
>>>> Tested-by: kernel test robot <oliver.sang@intel.com>
>>>> ---
>>>>  arch/x86/events/core.c       |  3 +++
>>>>  arch/x86/events/intel/core.c |  6 +++++-
>>>>  arch/x86/events/intel/ds.c   | 13 ++++++-------
>>>>  3 files changed, 14 insertions(+), 8 deletions(-)
>>>>
>>>> diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
>>>> index 7610f26dfbd9..f0a3bc57157d 100644
>>>> --- a/arch/x86/events/core.c
>>>> +++ b/arch/x86/events/core.c
>>>> @@ -1711,6 +1711,9 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
>>>>  			continue;
>>>>  
>>>>  		event = cpuc->events[idx];
>>>> +		if (!event)
>>>> +			continue;
>>>> +
>>>>  		last_period = event->hw.last_period;
>>>>  
>>>>  		val = static_call(x86_pmu_update)(event);
>>>> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
>>>> index 15da60cf69f2..386717b75a09 100644
>>>> --- a/arch/x86/events/intel/core.c
>>>> +++ b/arch/x86/events/intel/core.c
>>>> @@ -2718,6 +2718,8 @@ static void update_saved_topdown_regs(struct perf_event *event, u64 slots,
>>>>  		if (!is_topdown_idx(idx))
>>>>  			continue;
>>>>  		other = cpuc->events[idx];
>>>> +		if (!other)
>>>> +			continue;
>>>>  		other->hw.saved_slots = slots;
>>>>  		other->hw.saved_metric = metrics;
>>>>  	}
>>>> @@ -2761,6 +2763,8 @@ static u64 intel_update_topdown_event(struct perf_event *event, int metric_end,
>>>>  		if (!is_topdown_idx(idx))
>>>>  			continue;
>>>>  		other = cpuc->events[idx];
>>>> +		if (!other)
>>>> +			continue;
>>>>  		__icl_update_topdown_event(other, slots, metrics,
>>>>  					   event ? event->hw.saved_slots : 0,
>>>>  					   event ? event->hw.saved_metric : 0);
>>>> @@ -3138,7 +3142,7 @@ static void x86_pmu_handle_guest_pebs(struct pt_regs *regs,
>>>>  
>>>>  	for_each_set_bit(bit, (unsigned long *)&guest_pebs_idxs, X86_PMC_IDX_MAX) {
>>>>  		event = cpuc->events[bit];
>>>> -		if (!event->attr.precise_ip)
>>>> +		if (!event || !event->attr.precise_ip)
>>>>  			continue;
>>>>  
>>>>  		perf_sample_data_init(data, 0, event->hw.last_period);
>>>> diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
>>>> index c0b7ac1c7594..b23c49e2e06f 100644
>>>> --- a/arch/x86/events/intel/ds.c
>>>> +++ b/arch/x86/events/intel/ds.c
>>>> @@ -2480,6 +2480,8 @@ static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, u64
>>>>  	 */
>>>>  	for_each_set_bit(bit, (unsigned long *)&pebs_enabled, X86_PMC_IDX_MAX) {
>>>>  		event = cpuc->events[bit];
>>>> +		if (!event)
>>>> +			continue;
>>>>  		if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
>>>>  			intel_pmu_save_and_restart_reload(event, 0);
>>>>  	}
>>>> @@ -2579,10 +2581,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
>>>>  			continue;
>>>>  
>>>>  		event = cpuc->events[bit];
>>>> -		if (WARN_ON_ONCE(!event))
>>>> -			continue;
>>>> -
>>>> -		if (WARN_ON_ONCE(!event->attr.precise_ip))
>>>> +		if (!event || WARN_ON_ONCE(!event->attr.precise_ip))
>>>>  			continue;
>>>>  
>>>>  		/* log dropped samples number */
>>>> @@ -2645,9 +2644,7 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
>>>>  		pebs_status = basic->applicable_counters & cpuc->pebs_enabled & mask;
>>>>  		for_each_set_bit(bit, (unsigned long *)&pebs_status, X86_PMC_IDX_MAX) {
>>>>  			event = cpuc->events[bit];
>>>> -
>>>> -			if (WARN_ON_ONCE(!event) ||
>>>> -			    WARN_ON_ONCE(!event->attr.precise_ip))
>>>> +			if (!event || WARN_ON_ONCE(!event->attr.precise_ip))
>>>>  				continue;
>>>>  
>>>>  			if (counts[bit]++) {
>>>> @@ -2663,6 +2660,8 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
>>>>  			continue;
>>>>  
>>>>  		event = cpuc->events[bit];
>>>> +		if (!event)
>>>> +			continue;
>>>>  
>>>>  		__intel_pmu_pebs_last_event(event, iregs, regs, data, last[bit],
>>>>  					    counts[bit], setup_pebs_adaptive_sample_data);
>

[Patch v2 1/6] perf/x86/intel: Use early_initcall() to hook bts_init()
[Patch v2 2/6] perf/x86/intel: Fix IA32_PMC_x_CFG_B MSRs access error
[Patch v2 3/6] perf/x86: Check if cpuc->events[*] pointer exists before accessing it
[Patch v2 4/6] perf/x86: Add PERF_CAP_PEBS_TIMING_INFO flag
[Patch v2 5/6] perf/x86/intel: Change macro GLOBAL_CTRL_EN_PERF_METRICS to BIT_ULL(48)
[Patch v2 6/6] perf/x86/intel: Add ICL_FIXED_0_ADAPTIVE bit into INTEL_FIXED_BITS_MASK