[v9] arch-PEBS enabling for Intel platforms

[Patch v9 02/12] perf/x86: Fix NULL event access and potential PEBS record loss

Posted by Dapeng Mi 3 months, 1 week ago

When intel_pmu_drain_pebs_icl() is called to drain PEBS records, the
perf_event_overflow() could be called to process the last PEBS record.

While perf_event_overflow() could trigger the interrupt throttle and
stop all events of the group, like what the below call-chain shows.

perf_event_overflow()
  -> __perf_event_overflow()
    ->__perf_event_account_interrupt()
      -> perf_event_throttle_group()
        -> perf_event_throttle()
          -> event->pmu->stop()
            -> x86_pmu_stop()

The side effect of stopping the events is that all corresponding event
pointers in cpuc->events[] array are cleared to NULL.

Assume there are two PEBS events (event a and event b) in a group. When
intel_pmu_drain_pebs_icl() calls perf_event_overflow() to process the
last PEBS record of PEBS event a, interrupt throttle is triggered and
all pointers of event a and event b are cleared to NULL. Then
intel_pmu_drain_pebs_icl() tries to process the last PEBS record of
event b and encounters NULL pointer access.

To avoid this issue, move cpuc->events[] clearing from x86_pmu_stop()
to x86_pmu_del(). It's safe since cpuc->active_mask or
cpuc->pebs_enabled is always checked before access the event pointer
from cpuc->events[].

Reported-by: kernel test robot <oliver.sang@intel.com>
Closes: https://lore.kernel.org/oe-lkp/202507042103.a15d2923-lkp@intel.com
Fixes: 9734e25fbf5a ("perf: Fix the throttle logic for a group")
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
---
 arch/x86/events/core.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 745caa6c15a3..74479f9d6eed 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1344,6 +1344,7 @@ static void x86_pmu_enable(struct pmu *pmu)
 				hwc->state |= PERF_HES_ARCH;
 
 			x86_pmu_stop(event, PERF_EF_UPDATE);
+			cpuc->events[hwc->idx] = NULL;
 		}
 
 		/*
@@ -1365,6 +1366,7 @@ static void x86_pmu_enable(struct pmu *pmu)
 			 * if cpuc->enabled = 0, then no wrmsr as
 			 * per x86_pmu_enable_event()
 			 */
+			cpuc->events[hwc->idx] = event;
 			x86_pmu_start(event, PERF_EF_RELOAD);
 		}
 		cpuc->n_added = 0;
@@ -1531,7 +1533,6 @@ static void x86_pmu_start(struct perf_event *event, int flags)
 
 	event->hw.state = 0;
 
-	cpuc->events[idx] = event;
 	__set_bit(idx, cpuc->active_mask);
 	static_call(x86_pmu_enable)(event);
 	perf_event_update_userpage(event);
@@ -1610,7 +1611,6 @@ void x86_pmu_stop(struct perf_event *event, int flags)
 	if (test_bit(hwc->idx, cpuc->active_mask)) {
 		static_call(x86_pmu_disable)(event);
 		__clear_bit(hwc->idx, cpuc->active_mask);
-		cpuc->events[hwc->idx] = NULL;
 		WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
 		hwc->state |= PERF_HES_STOPPED;
 	}
@@ -1648,6 +1648,7 @@ static void x86_pmu_del(struct perf_event *event, int flags)
 	 * Not a TXN, therefore cleanup properly.
 	 */
 	x86_pmu_stop(event, PERF_EF_UPDATE);
+	cpuc->events[event->hw.idx] = NULL;
 
 	for (i = 0; i < cpuc->n_events; i++) {
 		if (event == cpuc->event_list[i])
-- 
2.34.1

Re: [Patch v9 02/12] perf/x86: Fix NULL event access and potential PEBS record loss

Posted by Peter Zijlstra 3 months ago

George, it just occurred to me that the below might also fix the root
cause of your 866cf36bfee4 ("perf/x86/amd: Check event before enable to avoid GPF")
and thus we can revert that again.

Specifically, this moves the clearing of cpuc->events[] out to
x86_pmu_del() time.

On Wed, Oct 29, 2025 at 06:21:26PM +0800, Dapeng Mi wrote:
> When intel_pmu_drain_pebs_icl() is called to drain PEBS records, the
> perf_event_overflow() could be called to process the last PEBS record.
> 
> While perf_event_overflow() could trigger the interrupt throttle and
> stop all events of the group, like what the below call-chain shows.
> 
> perf_event_overflow()
>   -> __perf_event_overflow()
>     ->__perf_event_account_interrupt()
>       -> perf_event_throttle_group()
>         -> perf_event_throttle()
>           -> event->pmu->stop()
>             -> x86_pmu_stop()
> 
> The side effect of stopping the events is that all corresponding event
> pointers in cpuc->events[] array are cleared to NULL.
> 
> Assume there are two PEBS events (event a and event b) in a group. When
> intel_pmu_drain_pebs_icl() calls perf_event_overflow() to process the
> last PEBS record of PEBS event a, interrupt throttle is triggered and
> all pointers of event a and event b are cleared to NULL. Then
> intel_pmu_drain_pebs_icl() tries to process the last PEBS record of
> event b and encounters NULL pointer access.
> 
> To avoid this issue, move cpuc->events[] clearing from x86_pmu_stop()
> to x86_pmu_del(). It's safe since cpuc->active_mask or
> cpuc->pebs_enabled is always checked before access the event pointer
> from cpuc->events[].
> 
> Reported-by: kernel test robot <oliver.sang@intel.com>
> Closes: https://lore.kernel.org/oe-lkp/202507042103.a15d2923-lkp@intel.com
> Fixes: 9734e25fbf5a ("perf: Fix the throttle logic for a group")
> Suggested-by: Peter Zijlstra <peterz@infradead.org>
> Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
> ---
>  arch/x86/events/core.c | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
> index 745caa6c15a3..74479f9d6eed 100644
> --- a/arch/x86/events/core.c
> +++ b/arch/x86/events/core.c
> @@ -1344,6 +1344,7 @@ static void x86_pmu_enable(struct pmu *pmu)
>  				hwc->state |= PERF_HES_ARCH;
>  
>  			x86_pmu_stop(event, PERF_EF_UPDATE);
> +			cpuc->events[hwc->idx] = NULL;
>  		}
>  
>  		/*
> @@ -1365,6 +1366,7 @@ static void x86_pmu_enable(struct pmu *pmu)
>  			 * if cpuc->enabled = 0, then no wrmsr as
>  			 * per x86_pmu_enable_event()
>  			 */
> +			cpuc->events[hwc->idx] = event;
>  			x86_pmu_start(event, PERF_EF_RELOAD);
>  		}
>  		cpuc->n_added = 0;
> @@ -1531,7 +1533,6 @@ static void x86_pmu_start(struct perf_event *event, int flags)
>  
>  	event->hw.state = 0;
>  
> -	cpuc->events[idx] = event;
>  	__set_bit(idx, cpuc->active_mask);
>  	static_call(x86_pmu_enable)(event);
>  	perf_event_update_userpage(event);
> @@ -1610,7 +1611,6 @@ void x86_pmu_stop(struct perf_event *event, int flags)
>  	if (test_bit(hwc->idx, cpuc->active_mask)) {
>  		static_call(x86_pmu_disable)(event);
>  		__clear_bit(hwc->idx, cpuc->active_mask);
> -		cpuc->events[hwc->idx] = NULL;
>  		WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
>  		hwc->state |= PERF_HES_STOPPED;
>  	}
> @@ -1648,6 +1648,7 @@ static void x86_pmu_del(struct perf_event *event, int flags)
>  	 * Not a TXN, therefore cleanup properly.
>  	 */
>  	x86_pmu_stop(event, PERF_EF_UPDATE);
> +	cpuc->events[event->hw.idx] = NULL;
>  
>  	for (i = 0; i < cpuc->n_events; i++) {
>  		if (event == cpuc->event_list[i])
> -- 
> 2.34.1
>

[tip: perf/core] perf/x86: Fix NULL event access and potential PEBS record loss

Posted by tip-bot2 for Dapeng Mi 2 months, 4 weeks ago

The following commit has been merged into the perf/core branch of tip:

Commit-ID:     7e772a93eb61cb6265bdd1c5bde17d0f2718b452
Gitweb:        https://git.kernel.org/tip/7e772a93eb61cb6265bdd1c5bde17d0f2718b452
Author:        Dapeng Mi <dapeng1.mi@linux.intel.com>
AuthorDate:    Wed, 29 Oct 2025 18:21:26 +08:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Fri, 07 Nov 2025 15:08:19 +01:00

perf/x86: Fix NULL event access and potential PEBS record loss

When intel_pmu_drain_pebs_icl() is called to drain PEBS records, the
perf_event_overflow() could be called to process the last PEBS record.

While perf_event_overflow() could trigger the interrupt throttle and
stop all events of the group, like what the below call-chain shows.

perf_event_overflow()
  -> __perf_event_overflow()
    ->__perf_event_account_interrupt()
      -> perf_event_throttle_group()
        -> perf_event_throttle()
          -> event->pmu->stop()
            -> x86_pmu_stop()

The side effect of stopping the events is that all corresponding event
pointers in cpuc->events[] array are cleared to NULL.

Assume there are two PEBS events (event a and event b) in a group. When
intel_pmu_drain_pebs_icl() calls perf_event_overflow() to process the
last PEBS record of PEBS event a, interrupt throttle is triggered and
all pointers of event a and event b are cleared to NULL. Then
intel_pmu_drain_pebs_icl() tries to process the last PEBS record of
event b and encounters NULL pointer access.

To avoid this issue, move cpuc->events[] clearing from x86_pmu_stop()
to x86_pmu_del(). It's safe since cpuc->active_mask or
cpuc->pebs_enabled is always checked before access the event pointer
from cpuc->events[].

Closes: https://lore.kernel.org/oe-lkp/202507042103.a15d2923-lkp@intel.com
Fixes: 9734e25fbf5a ("perf: Fix the throttle logic for a group")
Reported-by: kernel test robot <oliver.sang@intel.com>
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20251029102136.61364-3-dapeng1.mi@linux.intel.com
---
 arch/x86/events/core.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 0cf68ad..b2868fe 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1344,6 +1344,7 @@ static void x86_pmu_enable(struct pmu *pmu)
 				hwc->state |= PERF_HES_ARCH;
 
 			x86_pmu_stop(event, PERF_EF_UPDATE);
+			cpuc->events[hwc->idx] = NULL;
 		}
 
 		/*
@@ -1365,6 +1366,7 @@ static void x86_pmu_enable(struct pmu *pmu)
 			 * if cpuc->enabled = 0, then no wrmsr as
 			 * per x86_pmu_enable_event()
 			 */
+			cpuc->events[hwc->idx] = event;
 			x86_pmu_start(event, PERF_EF_RELOAD);
 		}
 		cpuc->n_added = 0;
@@ -1531,7 +1533,6 @@ static void x86_pmu_start(struct perf_event *event, int flags)
 
 	event->hw.state = 0;
 
-	cpuc->events[idx] = event;
 	__set_bit(idx, cpuc->active_mask);
 	static_call(x86_pmu_enable)(event);
 	perf_event_update_userpage(event);
@@ -1610,7 +1611,6 @@ void x86_pmu_stop(struct perf_event *event, int flags)
 	if (test_bit(hwc->idx, cpuc->active_mask)) {
 		static_call(x86_pmu_disable)(event);
 		__clear_bit(hwc->idx, cpuc->active_mask);
-		cpuc->events[hwc->idx] = NULL;
 		WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
 		hwc->state |= PERF_HES_STOPPED;
 	}
@@ -1648,6 +1648,7 @@ static void x86_pmu_del(struct perf_event *event, int flags)
 	 * Not a TXN, therefore cleanup properly.
 	 */
 	x86_pmu_stop(event, PERF_EF_UPDATE);
+	cpuc->events[event->hw.idx] = NULL;
 
 	for (i = 0; i < cpuc->n_events; i++) {
 		if (event == cpuc->event_list[i])