[PATCH v2] perf/core: Fix cgroup events tracking

Chengming Zhou posted 1 patch 2 years, 9 months ago
kernel/events/core.c | 42 ++++++++++--------------------------------
1 file changed, 10 insertions(+), 32 deletions(-)
[PATCH v2] perf/core: Fix cgroup events tracking
Posted by Chengming Zhou 2 years, 9 months ago
We encounter perf warnings when using cgroup events like:
```
cd /sys/fs/cgroup
mkdir test
perf stat -e cycles -a -G test
```

WARNING: CPU: 0 PID: 690 at kernel/events/core.c:849 perf_cgroup_switch+0xb2/0xc0
Call Trace:
 <TASK>
 __schedule+0x4ae/0x9f0
 ? _raw_spin_unlock_irqrestore+0x23/0x40
 ? __cond_resched+0x18/0x20
 preempt_schedule_common+0x2d/0x70
 __cond_resched+0x18/0x20
 wait_for_completion+0x2f/0x160
 ? cpu_stop_queue_work+0x9e/0x130
 affine_move_task+0x18a/0x4f0

WARNING: CPU: 0 PID: 690 at kernel/events/core.c:829 ctx_sched_in+0x1cf/0x1e0
Call Trace:
 <TASK>
 ? ctx_sched_out+0xb7/0x1b0
 perf_cgroup_switch+0x88/0xc0
 __schedule+0x4ae/0x9f0
 ? _raw_spin_unlock_irqrestore+0x23/0x40
 ? __cond_resched+0x18/0x20
 preempt_schedule_common+0x2d/0x70
 __cond_resched+0x18/0x20
 wait_for_completion+0x2f/0x160
 ? cpu_stop_queue_work+0x9e/0x130
 affine_move_task+0x18a/0x4f0

The above two warnings are not complete here since I remove other
unimportant information. The problem is caused by the perf cgroup
events tracking:

CPU0					CPU1
perf_event_open()
  perf_event_alloc()
    account_event()
      account_event_cpu()
        atomic_inc(perf_cgroup_events)
					__perf_event_task_sched_out()
					  if (atomic_read(perf_cgroup_events))
					    perf_cgroup_switch()
					      // kernel/events/core.c:849
					      WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0)
					      if (READ_ONCE(cpuctx->cgrp) == cgrp) // false
					        return
					      perf_ctx_lock()
					      ctx_sched_out()
					      cpuctx->cgrp = cgrp
					      ctx_sched_in()
					        perf_cgroup_set_timestamp()
					          // kernel/events/core.c:829
					          WARN_ON_ONCE(!ctx->nr_cgroups)
					      perf_ctx_unlock()
  perf_install_in_context()
    cpu_function_call()
					__perf_install_in_context()
					  add_event_to_ctx()
					    list_add_event()
					      perf_cgroup_event_enable()
					        ctx->nr_cgroups++
					        cpuctx->cgrp = X

We can see from above that we wrongly use percpu atomic perf_cgroup_events
to check if we need to perf_cgroup_switch(), which should only be used
when we know this CPU has cgroup events enabled.

The commit bd2756811766 ("perf: Rewrite core context handling") change
to have only one context per-CPU, so we can just use cpuctx->cgrp to
check if this CPU has cgroup events enabled.

So percpu atomic perf_cgroup_events is not needed.

Fixes: bd2756811766 ("perf: Rewrite core context handling")
Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
Tested-by: Ravi Bangoria <ravi.bangoria@amd.com>
---
v2:
 - Remove timestamps and fix race graph in commit log per Ravi Bangoria.
 - Add Fixes tag and Tested-by tag.
---
 kernel/events/core.c | 42 ++++++++++--------------------------------
 1 file changed, 10 insertions(+), 32 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index eacc3702654d..5d97a9f26003 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -380,7 +380,6 @@ enum event_type_t {
 
 /*
  * perf_sched_events : >0 events exist
- * perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu
  */
 
 static void perf_sched_delayed(struct work_struct *work);
@@ -389,7 +388,6 @@ static DECLARE_DELAYED_WORK(perf_sched_work, perf_sched_delayed);
 static DEFINE_MUTEX(perf_sched_mutex);
 static atomic_t perf_sched_count;
 
-static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
 static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
 
 static atomic_t nr_mmap_events __read_mostly;
@@ -844,9 +842,16 @@ static void perf_cgroup_switch(struct task_struct *task)
 	struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
 	struct perf_cgroup *cgrp;
 
-	cgrp = perf_cgroup_from_task(task, NULL);
+	/*
+	 * cpuctx->cgrp is set when the first cgroup event enabled,
+	 * and is cleared when the last cgroup event disabled.
+	 */
+	if (READ_ONCE(cpuctx->cgrp) == NULL)
+		return;
 
 	WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0);
+
+	cgrp = perf_cgroup_from_task(task, NULL);
 	if (READ_ONCE(cpuctx->cgrp) == cgrp)
 		return;
 
@@ -3631,8 +3636,7 @@ void __perf_event_task_sched_out(struct task_struct *task,
 	 * to check if we have to switch out PMU state.
 	 * cgroup event are system-wide mode only
 	 */
-	if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
-		perf_cgroup_switch(next);
+	perf_cgroup_switch(next);
 }
 
 static bool perf_less_group_idx(const void *l, const void *r)
@@ -4974,15 +4978,6 @@ static void unaccount_pmu_sb_event(struct perf_event *event)
 		detach_sb_event(event);
 }
 
-static void unaccount_event_cpu(struct perf_event *event, int cpu)
-{
-	if (event->parent)
-		return;
-
-	if (is_cgroup_event(event))
-		atomic_dec(&per_cpu(perf_cgroup_events, cpu));
-}
-
 #ifdef CONFIG_NO_HZ_FULL
 static DEFINE_SPINLOCK(nr_freq_lock);
 #endif
@@ -5048,8 +5043,6 @@ static void unaccount_event(struct perf_event *event)
 			schedule_delayed_work(&perf_sched_work, HZ);
 	}
 
-	unaccount_event_cpu(event, event->cpu);
-
 	unaccount_pmu_sb_event(event);
 }
 
@@ -11679,15 +11672,6 @@ static void account_pmu_sb_event(struct perf_event *event)
 		attach_sb_event(event);
 }
 
-static void account_event_cpu(struct perf_event *event, int cpu)
-{
-	if (event->parent)
-		return;
-
-	if (is_cgroup_event(event))
-		atomic_inc(&per_cpu(perf_cgroup_events, cpu));
-}
-
 /* Freq events need the tick to stay alive (see perf_event_task_tick). */
 static void account_freq_event_nohz(void)
 {
@@ -11775,8 +11759,6 @@ static void account_event(struct perf_event *event)
 	}
 enabled:
 
-	account_event_cpu(event, event->cpu);
-
 	account_pmu_sb_event(event);
 }
 
@@ -12822,13 +12804,11 @@ static void __perf_pmu_remove(struct perf_event_context *ctx,
 
 	perf_event_groups_for_cpu_pmu(event, groups, cpu, pmu) {
 		perf_remove_from_context(event, 0);
-		unaccount_event_cpu(event, cpu);
 		put_pmu_ctx(event->pmu_ctx);
 		list_add(&event->migrate_entry, events);
 
 		for_each_sibling_event(sibling, event) {
 			perf_remove_from_context(sibling, 0);
-			unaccount_event_cpu(sibling, cpu);
 			put_pmu_ctx(sibling->pmu_ctx);
 			list_add(&sibling->migrate_entry, events);
 		}
@@ -12847,7 +12827,6 @@ static void __perf_pmu_install_event(struct pmu *pmu,
 
 	if (event->state >= PERF_EVENT_STATE_OFF)
 		event->state = PERF_EVENT_STATE_INACTIVE;
-	account_event_cpu(event, cpu);
 	perf_install_in_context(ctx, event, cpu);
 }
 
@@ -13742,8 +13721,7 @@ static int __perf_cgroup_move(void *info)
 	struct task_struct *task = info;
 
 	preempt_disable();
-	if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
-		perf_cgroup_switch(task);
+	perf_cgroup_switch(task);
 	preempt_enable();
 
 	return 0;
-- 
2.37.2
Re: [PATCH v2] perf/core: Fix cgroup events tracking
Posted by Chengming Zhou 2 years, 9 months ago
Hello, ping :-)


On 2022/12/7 20:40, Chengming Zhou wrote:
> We encounter perf warnings when using cgroup events like:
> ```
> cd /sys/fs/cgroup
> mkdir test
> perf stat -e cycles -a -G test
> ```
> 
> WARNING: CPU: 0 PID: 690 at kernel/events/core.c:849 perf_cgroup_switch+0xb2/0xc0
> Call Trace:
>  <TASK>
>  __schedule+0x4ae/0x9f0
>  ? _raw_spin_unlock_irqrestore+0x23/0x40
>  ? __cond_resched+0x18/0x20
>  preempt_schedule_common+0x2d/0x70
>  __cond_resched+0x18/0x20
>  wait_for_completion+0x2f/0x160
>  ? cpu_stop_queue_work+0x9e/0x130
>  affine_move_task+0x18a/0x4f0
> 
> WARNING: CPU: 0 PID: 690 at kernel/events/core.c:829 ctx_sched_in+0x1cf/0x1e0
> Call Trace:
>  <TASK>
>  ? ctx_sched_out+0xb7/0x1b0
>  perf_cgroup_switch+0x88/0xc0
>  __schedule+0x4ae/0x9f0
>  ? _raw_spin_unlock_irqrestore+0x23/0x40
>  ? __cond_resched+0x18/0x20
>  preempt_schedule_common+0x2d/0x70
>  __cond_resched+0x18/0x20
>  wait_for_completion+0x2f/0x160
>  ? cpu_stop_queue_work+0x9e/0x130
>  affine_move_task+0x18a/0x4f0
> 
> The above two warnings are not complete here since I remove other
> unimportant information. The problem is caused by the perf cgroup
> events tracking:
> 
> CPU0					CPU1
> perf_event_open()
>   perf_event_alloc()
>     account_event()
>       account_event_cpu()
>         atomic_inc(perf_cgroup_events)
> 					__perf_event_task_sched_out()
> 					  if (atomic_read(perf_cgroup_events))
> 					    perf_cgroup_switch()
> 					      // kernel/events/core.c:849
> 					      WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0)
> 					      if (READ_ONCE(cpuctx->cgrp) == cgrp) // false
> 					        return
> 					      perf_ctx_lock()
> 					      ctx_sched_out()
> 					      cpuctx->cgrp = cgrp
> 					      ctx_sched_in()
> 					        perf_cgroup_set_timestamp()
> 					          // kernel/events/core.c:829
> 					          WARN_ON_ONCE(!ctx->nr_cgroups)
> 					      perf_ctx_unlock()
>   perf_install_in_context()
>     cpu_function_call()
> 					__perf_install_in_context()
> 					  add_event_to_ctx()
> 					    list_add_event()
> 					      perf_cgroup_event_enable()
> 					        ctx->nr_cgroups++
> 					        cpuctx->cgrp = X
> 
> We can see from above that we wrongly use percpu atomic perf_cgroup_events
> to check if we need to perf_cgroup_switch(), which should only be used
> when we know this CPU has cgroup events enabled.
> 
> The commit bd2756811766 ("perf: Rewrite core context handling") change
> to have only one context per-CPU, so we can just use cpuctx->cgrp to
> check if this CPU has cgroup events enabled.
> 
> So percpu atomic perf_cgroup_events is not needed.
> 
> Fixes: bd2756811766 ("perf: Rewrite core context handling")
> Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
> Tested-by: Ravi Bangoria <ravi.bangoria@amd.com>
> ---
> v2:
>  - Remove timestamps and fix race graph in commit log per Ravi Bangoria.
>  - Add Fixes tag and Tested-by tag.
> ---
>  kernel/events/core.c | 42 ++++++++++--------------------------------
>  1 file changed, 10 insertions(+), 32 deletions(-)
> 
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index eacc3702654d..5d97a9f26003 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -380,7 +380,6 @@ enum event_type_t {
>  
>  /*
>   * perf_sched_events : >0 events exist
> - * perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu
>   */
>  
>  static void perf_sched_delayed(struct work_struct *work);
> @@ -389,7 +388,6 @@ static DECLARE_DELAYED_WORK(perf_sched_work, perf_sched_delayed);
>  static DEFINE_MUTEX(perf_sched_mutex);
>  static atomic_t perf_sched_count;
>  
> -static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
>  static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
>  
>  static atomic_t nr_mmap_events __read_mostly;
> @@ -844,9 +842,16 @@ static void perf_cgroup_switch(struct task_struct *task)
>  	struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
>  	struct perf_cgroup *cgrp;
>  
> -	cgrp = perf_cgroup_from_task(task, NULL);
> +	/*
> +	 * cpuctx->cgrp is set when the first cgroup event enabled,
> +	 * and is cleared when the last cgroup event disabled.
> +	 */
> +	if (READ_ONCE(cpuctx->cgrp) == NULL)
> +		return;
>  
>  	WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0);
> +
> +	cgrp = perf_cgroup_from_task(task, NULL);
>  	if (READ_ONCE(cpuctx->cgrp) == cgrp)
>  		return;
>  
> @@ -3631,8 +3636,7 @@ void __perf_event_task_sched_out(struct task_struct *task,
>  	 * to check if we have to switch out PMU state.
>  	 * cgroup event are system-wide mode only
>  	 */
> -	if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
> -		perf_cgroup_switch(next);
> +	perf_cgroup_switch(next);
>  }
>  
>  static bool perf_less_group_idx(const void *l, const void *r)
> @@ -4974,15 +4978,6 @@ static void unaccount_pmu_sb_event(struct perf_event *event)
>  		detach_sb_event(event);
>  }
>  
> -static void unaccount_event_cpu(struct perf_event *event, int cpu)
> -{
> -	if (event->parent)
> -		return;
> -
> -	if (is_cgroup_event(event))
> -		atomic_dec(&per_cpu(perf_cgroup_events, cpu));
> -}
> -
>  #ifdef CONFIG_NO_HZ_FULL
>  static DEFINE_SPINLOCK(nr_freq_lock);
>  #endif
> @@ -5048,8 +5043,6 @@ static void unaccount_event(struct perf_event *event)
>  			schedule_delayed_work(&perf_sched_work, HZ);
>  	}
>  
> -	unaccount_event_cpu(event, event->cpu);
> -
>  	unaccount_pmu_sb_event(event);
>  }
>  
> @@ -11679,15 +11672,6 @@ static void account_pmu_sb_event(struct perf_event *event)
>  		attach_sb_event(event);
>  }
>  
> -static void account_event_cpu(struct perf_event *event, int cpu)
> -{
> -	if (event->parent)
> -		return;
> -
> -	if (is_cgroup_event(event))
> -		atomic_inc(&per_cpu(perf_cgroup_events, cpu));
> -}
> -
>  /* Freq events need the tick to stay alive (see perf_event_task_tick). */
>  static void account_freq_event_nohz(void)
>  {
> @@ -11775,8 +11759,6 @@ static void account_event(struct perf_event *event)
>  	}
>  enabled:
>  
> -	account_event_cpu(event, event->cpu);
> -
>  	account_pmu_sb_event(event);
>  }
>  
> @@ -12822,13 +12804,11 @@ static void __perf_pmu_remove(struct perf_event_context *ctx,
>  
>  	perf_event_groups_for_cpu_pmu(event, groups, cpu, pmu) {
>  		perf_remove_from_context(event, 0);
> -		unaccount_event_cpu(event, cpu);
>  		put_pmu_ctx(event->pmu_ctx);
>  		list_add(&event->migrate_entry, events);
>  
>  		for_each_sibling_event(sibling, event) {
>  			perf_remove_from_context(sibling, 0);
> -			unaccount_event_cpu(sibling, cpu);
>  			put_pmu_ctx(sibling->pmu_ctx);
>  			list_add(&sibling->migrate_entry, events);
>  		}
> @@ -12847,7 +12827,6 @@ static void __perf_pmu_install_event(struct pmu *pmu,
>  
>  	if (event->state >= PERF_EVENT_STATE_OFF)
>  		event->state = PERF_EVENT_STATE_INACTIVE;
> -	account_event_cpu(event, cpu);
>  	perf_install_in_context(ctx, event, cpu);
>  }
>  
> @@ -13742,8 +13721,7 @@ static int __perf_cgroup_move(void *info)
>  	struct task_struct *task = info;
>  
>  	preempt_disable();
> -	if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
> -		perf_cgroup_switch(task);
> +	perf_cgroup_switch(task);
>  	preempt_enable();
>  
>  	return 0;
Re: [PATCH v2] perf/core: Fix cgroup events tracking
Posted by Peter Zijlstra 2 years, 9 months ago
On Fri, Dec 16, 2022 at 07:25:25PM +0800, Chengming Zhou wrote:
> Hello, ping :-)

I just queued it for perf/urgent. Thanks
[tip: perf/urgent] perf/core: Fix cgroup events tracking
Posted by tip-bot2 for Chengming Zhou 2 years, 8 months ago
The following commit has been merged into the perf/urgent branch of tip:

Commit-ID:     f841b682baef90ee144df8b12e2c76aa460717c1
Gitweb:        https://git.kernel.org/tip/f841b682baef90ee144df8b12e2c76aa460717c1
Author:        Chengming Zhou <zhouchengming@bytedance.com>
AuthorDate:    Wed, 07 Dec 2022 20:40:23 +08:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Tue, 27 Dec 2022 12:44:00 +01:00

perf/core: Fix cgroup events tracking

We encounter perf warnings when using cgroup events like:

  cd /sys/fs/cgroup
  mkdir test
  perf stat -e cycles -a -G test

Which then triggers:

  WARNING: CPU: 0 PID: 690 at kernel/events/core.c:849 perf_cgroup_switch+0xb2/0xc0
  Call Trace:
   <TASK>
   __schedule+0x4ae/0x9f0
   ? _raw_spin_unlock_irqrestore+0x23/0x40
   ? __cond_resched+0x18/0x20
   preempt_schedule_common+0x2d/0x70
   __cond_resched+0x18/0x20
   wait_for_completion+0x2f/0x160
   ? cpu_stop_queue_work+0x9e/0x130
   affine_move_task+0x18a/0x4f0

  WARNING: CPU: 0 PID: 690 at kernel/events/core.c:829 ctx_sched_in+0x1cf/0x1e0
  Call Trace:
   <TASK>
   ? ctx_sched_out+0xb7/0x1b0
   perf_cgroup_switch+0x88/0xc0
   __schedule+0x4ae/0x9f0
   ? _raw_spin_unlock_irqrestore+0x23/0x40
   ? __cond_resched+0x18/0x20
   preempt_schedule_common+0x2d/0x70
   __cond_resched+0x18/0x20
   wait_for_completion+0x2f/0x160
   ? cpu_stop_queue_work+0x9e/0x130
   affine_move_task+0x18a/0x4f0

The above two warnings are not complete here since I remove other
unimportant information. The problem is caused by the perf cgroup
events tracking:

  CPU0					CPU1
  perf_event_open()
    perf_event_alloc()
      account_event()
	account_event_cpu()
	  atomic_inc(perf_cgroup_events)
					  __perf_event_task_sched_out()
					    if (atomic_read(perf_cgroup_events))
					      perf_cgroup_switch()
						// kernel/events/core.c:849
						WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0)
						if (READ_ONCE(cpuctx->cgrp) == cgrp) // false
						  return
						perf_ctx_lock()
						ctx_sched_out()
						cpuctx->cgrp = cgrp
						ctx_sched_in()
						  perf_cgroup_set_timestamp()
						    // kernel/events/core.c:829
						    WARN_ON_ONCE(!ctx->nr_cgroups)
						perf_ctx_unlock()
    perf_install_in_context()
      cpu_function_call()
					  __perf_install_in_context()
					    add_event_to_ctx()
					      list_add_event()
						perf_cgroup_event_enable()
						  ctx->nr_cgroups++
						  cpuctx->cgrp = X

We can see from above that we wrongly use percpu atomic perf_cgroup_events
to check if we need to perf_cgroup_switch(), which should only be used
when we know this CPU has cgroup events enabled.

The commit bd2756811766 ("perf: Rewrite core context handling") change
to have only one context per-CPU, so we can just use cpuctx->cgrp to
check if this CPU has cgroup events enabled.

So percpu atomic perf_cgroup_events is not needed.

Fixes: bd2756811766 ("perf: Rewrite core context handling")
Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Ravi Bangoria <ravi.bangoria@amd.com>
Link: https://lkml.kernel.org/r/20221207124023.66252-1-zhouchengming@bytedance.com
---
 kernel/events/core.c | 42 ++++++++++--------------------------------
 1 file changed, 10 insertions(+), 32 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 4bd2434..37c0f04 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -380,7 +380,6 @@ enum event_type_t {
 
 /*
  * perf_sched_events : >0 events exist
- * perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu
  */
 
 static void perf_sched_delayed(struct work_struct *work);
@@ -389,7 +388,6 @@ static DECLARE_DELAYED_WORK(perf_sched_work, perf_sched_delayed);
 static DEFINE_MUTEX(perf_sched_mutex);
 static atomic_t perf_sched_count;
 
-static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
 static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
 
 static atomic_t nr_mmap_events __read_mostly;
@@ -844,9 +842,16 @@ static void perf_cgroup_switch(struct task_struct *task)
 	struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
 	struct perf_cgroup *cgrp;
 
-	cgrp = perf_cgroup_from_task(task, NULL);
+	/*
+	 * cpuctx->cgrp is set when the first cgroup event enabled,
+	 * and is cleared when the last cgroup event disabled.
+	 */
+	if (READ_ONCE(cpuctx->cgrp) == NULL)
+		return;
 
 	WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0);
+
+	cgrp = perf_cgroup_from_task(task, NULL);
 	if (READ_ONCE(cpuctx->cgrp) == cgrp)
 		return;
 
@@ -3631,8 +3636,7 @@ void __perf_event_task_sched_out(struct task_struct *task,
 	 * to check if we have to switch out PMU state.
 	 * cgroup event are system-wide mode only
 	 */
-	if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
-		perf_cgroup_switch(next);
+	perf_cgroup_switch(next);
 }
 
 static bool perf_less_group_idx(const void *l, const void *r)
@@ -4974,15 +4978,6 @@ static void unaccount_pmu_sb_event(struct perf_event *event)
 		detach_sb_event(event);
 }
 
-static void unaccount_event_cpu(struct perf_event *event, int cpu)
-{
-	if (event->parent)
-		return;
-
-	if (is_cgroup_event(event))
-		atomic_dec(&per_cpu(perf_cgroup_events, cpu));
-}
-
 #ifdef CONFIG_NO_HZ_FULL
 static DEFINE_SPINLOCK(nr_freq_lock);
 #endif
@@ -5048,8 +5043,6 @@ static void unaccount_event(struct perf_event *event)
 			schedule_delayed_work(&perf_sched_work, HZ);
 	}
 
-	unaccount_event_cpu(event, event->cpu);
-
 	unaccount_pmu_sb_event(event);
 }
 
@@ -11679,15 +11672,6 @@ static void account_pmu_sb_event(struct perf_event *event)
 		attach_sb_event(event);
 }
 
-static void account_event_cpu(struct perf_event *event, int cpu)
-{
-	if (event->parent)
-		return;
-
-	if (is_cgroup_event(event))
-		atomic_inc(&per_cpu(perf_cgroup_events, cpu));
-}
-
 /* Freq events need the tick to stay alive (see perf_event_task_tick). */
 static void account_freq_event_nohz(void)
 {
@@ -11775,8 +11759,6 @@ static void account_event(struct perf_event *event)
 	}
 enabled:
 
-	account_event_cpu(event, event->cpu);
-
 	account_pmu_sb_event(event);
 }
 
@@ -12822,13 +12804,11 @@ static void __perf_pmu_remove(struct perf_event_context *ctx,
 
 	perf_event_groups_for_cpu_pmu(event, groups, cpu, pmu) {
 		perf_remove_from_context(event, 0);
-		unaccount_event_cpu(event, cpu);
 		put_pmu_ctx(event->pmu_ctx);
 		list_add(&event->migrate_entry, events);
 
 		for_each_sibling_event(sibling, event) {
 			perf_remove_from_context(sibling, 0);
-			unaccount_event_cpu(sibling, cpu);
 			put_pmu_ctx(sibling->pmu_ctx);
 			list_add(&sibling->migrate_entry, events);
 		}
@@ -12847,7 +12827,6 @@ static void __perf_pmu_install_event(struct pmu *pmu,
 
 	if (event->state >= PERF_EVENT_STATE_OFF)
 		event->state = PERF_EVENT_STATE_INACTIVE;
-	account_event_cpu(event, cpu);
 	perf_install_in_context(ctx, event, cpu);
 }
 
@@ -13742,8 +13721,7 @@ static int __perf_cgroup_move(void *info)
 	struct task_struct *task = info;
 
 	preempt_disable();
-	if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
-		perf_cgroup_switch(task);
+	perf_cgroup_switch(task);
 	preempt_enable();
 
 	return 0;