[PATCH] perf/x86: Fix out of range data

Namhyung Kim posted 1 patch 2 years ago
There is a newer version of this series
arch/x86/events/core.c | 1 +
1 file changed, 1 insertion(+)
[PATCH] perf/x86: Fix out of range data
Posted by Namhyung Kim 2 years ago
On x86 each cpu_hw_events maintains a table for counter assignment but
it missed to update one for the deleted event in x86_pmu_del().  This
can make perf_clear_dirty_counters() reset used counter if it's called
before event scheduling or enabling.  Then it would return out of range
data which doesn't make sense.

The following code can reproduce the problem.

  $ cat repro.c
  #include <pthread.h>
  #include <stdio.h>
  #include <stdlib.h>
  #include <unistd.h>
  #include <linux/perf_event.h>
  #include <sys/ioctl.h>
  #include <sys/mman.h>
  #include <sys/syscall.h>

  struct perf_event_attr attr = {
  	.type = PERF_TYPE_HARDWARE,
  	.config = PERF_COUNT_HW_CPU_CYCLES,
  	.disabled = 1,
  };

  void *worker(void *arg)
  {
  	int cpu = (long)arg;
  	int fd1 = syscall(SYS_perf_event_open, &attr, -1, cpu, -1, 0);
  	int fd2 = syscall(SYS_perf_event_open, &attr, -1, cpu, -1, 0);
  	void *p;

  	do {
  		ioctl(fd1, PERF_EVENT_IOC_ENABLE, 0);
  		p = mmap(NULL, 4096, PROT_READ, MAP_SHARED, fd1, 0);
  		ioctl(fd2, PERF_EVENT_IOC_ENABLE, 0);

  		ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0);
  		munmap(p, 4096);
  		ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0);
  	} while (1);

  	return NULL;
  }

  int main(void)
  {
  	int i;
  	int n = sysconf(_SC_NPROCESSORS_ONLN);
  	pthread_t *th = calloc(n, sizeof(*th));

  	for (i = 0; i < n; i++)
  		pthread_create(&th[i], NULL, worker, (void *)(long)i);
  	for (i = 0; i < n; i++)
  		pthread_join(th[i], NULL);

  	free(th);
  	return 0;
  }

And you can see the out of range data using perf stat like this.
Probably it'd be easier to see on a large machine.

  $ gcc -o repro repro.c -pthread
  $ ./repro &
  $ sudo perf stat -A -I 1000 2>&1 | awk '{ if (length($3) > 15) print }'
       1.001028462 CPU6   196,719,295,683,763      cycles                           # 194290.996 GHz                       (71.54%)
       1.001028462 CPU3   396,077,485,787,730      branch-misses                    # 15804359784.80% of all branches      (71.07%)
       1.001028462 CPU17  197,608,350,727,877      branch-misses                    # 14594186554.56% of all branches      (71.22%)
       2.020064073 CPU4   198,372,472,612,140      cycles                           # 194681.113 GHz                       (70.95%)
       2.020064073 CPU6   199,419,277,896,696      cycles                           # 195720.007 GHz                       (70.57%)
       2.020064073 CPU20  198,147,174,025,639      cycles                           # 194474.654 GHz                       (71.03%)
       2.020064073 CPU20  198,421,240,580,145      stalled-cycles-frontend          #  100.14% frontend cycles idle        (70.93%)
       3.037443155 CPU4   197,382,689,923,416      cycles                           # 194043.065 GHz                       (71.30%)
       3.037443155 CPU20  196,324,797,879,414      cycles                           # 193003.773 GHz                       (71.69%)
       3.037443155 CPU5   197,679,956,608,205      stalled-cycles-backend           # 1315606428.66% backend cycles idle   (71.19%)
       3.037443155 CPU5   198,571,860,474,851      instructions                     # 13215422.58  insn per cycle

It should move the contents in the cpuc->assign as well.

Fixes: 5471eea5d3bf ("perf/x86: Reset the dirty counter to prevent the leak for an RDPMC task")
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: stable@vger.kernel.org
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 arch/x86/events/core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 09050641ce5d..5b0dd07b1ef1 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1644,6 +1644,7 @@ static void x86_pmu_del(struct perf_event *event, int flags)
 	while (++i < cpuc->n_events) {
 		cpuc->event_list[i-1] = cpuc->event_list[i];
 		cpuc->event_constraint[i-1] = cpuc->event_constraint[i];
+		cpuc->assign[i-1] = cpuc->assign[i];
 	}
 	cpuc->event_constraint[i-1] = NULL;
 	--cpuc->n_events;
-- 
2.43.0.472.g3155946c3a-goog
Re: [PATCH] perf/x86: Fix out of range data
Posted by Liang, Kan 2 years ago

On 2023-12-16 2:28 a.m., Namhyung Kim wrote:
> On x86 each cpu_hw_events maintains a table for counter assignment but
> it missed to update one for the deleted event in x86_pmu_del().  This
> can make perf_clear_dirty_counters() reset used counter if it's called
> before event scheduling or enabling.  Then it would return out of range
> data which doesn't make sense.
> 
> The following code can reproduce the problem.
> 
>   $ cat repro.c
>   #include <pthread.h>
>   #include <stdio.h>
>   #include <stdlib.h>
>   #include <unistd.h>
>   #include <linux/perf_event.h>
>   #include <sys/ioctl.h>
>   #include <sys/mman.h>
>   #include <sys/syscall.h>
> 
>   struct perf_event_attr attr = {
>   	.type = PERF_TYPE_HARDWARE,
>   	.config = PERF_COUNT_HW_CPU_CYCLES,
>   	.disabled = 1,
>   };
> 
>   void *worker(void *arg)
>   {
>   	int cpu = (long)arg;
>   	int fd1 = syscall(SYS_perf_event_open, &attr, -1, cpu, -1, 0);
>   	int fd2 = syscall(SYS_perf_event_open, &attr, -1, cpu, -1, 0);
>   	void *p;
> 
>   	do {
>   		ioctl(fd1, PERF_EVENT_IOC_ENABLE, 0);
>   		p = mmap(NULL, 4096, PROT_READ, MAP_SHARED, fd1, 0);
>   		ioctl(fd2, PERF_EVENT_IOC_ENABLE, 0);
> 
>   		ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0);
>   		munmap(p, 4096);
>   		ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0);
>   	} while (1);
> 
>   	return NULL;
>   }
> 
>   int main(void)
>   {
>   	int i;
>   	int n = sysconf(_SC_NPROCESSORS_ONLN);
>   	pthread_t *th = calloc(n, sizeof(*th));
> 
>   	for (i = 0; i < n; i++)
>   		pthread_create(&th[i], NULL, worker, (void *)(long)i);
>   	for (i = 0; i < n; i++)
>   		pthread_join(th[i], NULL);
> 
>   	free(th);
>   	return 0;
>   }
> 
> And you can see the out of range data using perf stat like this.
> Probably it'd be easier to see on a large machine.
> 
>   $ gcc -o repro repro.c -pthread
>   $ ./repro &
>   $ sudo perf stat -A -I 1000 2>&1 | awk '{ if (length($3) > 15) print }'
>        1.001028462 CPU6   196,719,295,683,763      cycles                           # 194290.996 GHz                       (71.54%)
>        1.001028462 CPU3   396,077,485,787,730      branch-misses                    # 15804359784.80% of all branches      (71.07%)
>        1.001028462 CPU17  197,608,350,727,877      branch-misses                    # 14594186554.56% of all branches      (71.22%)
>        2.020064073 CPU4   198,372,472,612,140      cycles                           # 194681.113 GHz                       (70.95%)
>        2.020064073 CPU6   199,419,277,896,696      cycles                           # 195720.007 GHz                       (70.57%)
>        2.020064073 CPU20  198,147,174,025,639      cycles                           # 194474.654 GHz                       (71.03%)
>        2.020064073 CPU20  198,421,240,580,145      stalled-cycles-frontend          #  100.14% frontend cycles idle        (70.93%)
>        3.037443155 CPU4   197,382,689,923,416      cycles                           # 194043.065 GHz                       (71.30%)
>        3.037443155 CPU20  196,324,797,879,414      cycles                           # 193003.773 GHz                       (71.69%)
>        3.037443155 CPU5   197,679,956,608,205      stalled-cycles-backend           # 1315606428.66% backend cycles idle   (71.19%)
>        3.037443155 CPU5   198,571,860,474,851      instructions                     # 13215422.58  insn per cycle
> 
> It should move the contents in the cpuc->assign as well.

Yes, the patch looks good to me.

Reviewed-by: Kan Liang <kan.liang@linux.intel.com>

Thanks,
Kan
> 
> Fixes: 5471eea5d3bf ("perf/x86: Reset the dirty counter to prevent the leak for an RDPMC task")
> Cc: Kan Liang <kan.liang@linux.intel.com>
> Cc: stable@vger.kernel.org
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> ---
>  arch/x86/events/core.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
> index 09050641ce5d..5b0dd07b1ef1 100644
> --- a/arch/x86/events/core.c
> +++ b/arch/x86/events/core.c
> @@ -1644,6 +1644,7 @@ static void x86_pmu_del(struct perf_event *event, int flags)
>  	while (++i < cpuc->n_events) {
>  		cpuc->event_list[i-1] = cpuc->event_list[i];
>  		cpuc->event_constraint[i-1] = cpuc->event_constraint[i];
> +		cpuc->assign[i-1] = cpuc->assign[i];
>  	}
>  	cpuc->event_constraint[i-1] = NULL;
>  	--cpuc->n_events;
Re: [PATCH] perf/x86: Fix out of range data
Posted by Namhyung Kim 1 year, 11 months ago
Hello,

On Sat, Dec 16, 2023 at 4:42 AM Liang, Kan <kan.liang@linux.intel.com> wrote:
>
>
>
> On 2023-12-16 2:28 a.m., Namhyung Kim wrote:
> > On x86 each cpu_hw_events maintains a table for counter assignment but
> > it missed to update one for the deleted event in x86_pmu_del().  This
> > can make perf_clear_dirty_counters() reset used counter if it's called
> > before event scheduling or enabling.  Then it would return out of range
> > data which doesn't make sense.
> >
> > The following code can reproduce the problem.
> >
> >   $ cat repro.c
> >   #include <pthread.h>
> >   #include <stdio.h>
> >   #include <stdlib.h>
> >   #include <unistd.h>
> >   #include <linux/perf_event.h>
> >   #include <sys/ioctl.h>
> >   #include <sys/mman.h>
> >   #include <sys/syscall.h>
> >
> >   struct perf_event_attr attr = {
> >       .type = PERF_TYPE_HARDWARE,
> >       .config = PERF_COUNT_HW_CPU_CYCLES,
> >       .disabled = 1,
> >   };
> >
> >   void *worker(void *arg)
> >   {
> >       int cpu = (long)arg;
> >       int fd1 = syscall(SYS_perf_event_open, &attr, -1, cpu, -1, 0);
> >       int fd2 = syscall(SYS_perf_event_open, &attr, -1, cpu, -1, 0);
> >       void *p;
> >
> >       do {
> >               ioctl(fd1, PERF_EVENT_IOC_ENABLE, 0);
> >               p = mmap(NULL, 4096, PROT_READ, MAP_SHARED, fd1, 0);
> >               ioctl(fd2, PERF_EVENT_IOC_ENABLE, 0);
> >
> >               ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0);
> >               munmap(p, 4096);
> >               ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0);
> >       } while (1);
> >
> >       return NULL;
> >   }
> >
> >   int main(void)
> >   {
> >       int i;
> >       int n = sysconf(_SC_NPROCESSORS_ONLN);
> >       pthread_t *th = calloc(n, sizeof(*th));
> >
> >       for (i = 0; i < n; i++)
> >               pthread_create(&th[i], NULL, worker, (void *)(long)i);
> >       for (i = 0; i < n; i++)
> >               pthread_join(th[i], NULL);
> >
> >       free(th);
> >       return 0;
> >   }
> >
> > And you can see the out of range data using perf stat like this.
> > Probably it'd be easier to see on a large machine.
> >
> >   $ gcc -o repro repro.c -pthread
> >   $ ./repro &
> >   $ sudo perf stat -A -I 1000 2>&1 | awk '{ if (length($3) > 15) print }'
> >        1.001028462 CPU6   196,719,295,683,763      cycles                           # 194290.996 GHz                       (71.54%)
> >        1.001028462 CPU3   396,077,485,787,730      branch-misses                    # 15804359784.80% of all branches      (71.07%)
> >        1.001028462 CPU17  197,608,350,727,877      branch-misses                    # 14594186554.56% of all branches      (71.22%)
> >        2.020064073 CPU4   198,372,472,612,140      cycles                           # 194681.113 GHz                       (70.95%)
> >        2.020064073 CPU6   199,419,277,896,696      cycles                           # 195720.007 GHz                       (70.57%)
> >        2.020064073 CPU20  198,147,174,025,639      cycles                           # 194474.654 GHz                       (71.03%)
> >        2.020064073 CPU20  198,421,240,580,145      stalled-cycles-frontend          #  100.14% frontend cycles idle        (70.93%)
> >        3.037443155 CPU4   197,382,689,923,416      cycles                           # 194043.065 GHz                       (71.30%)
> >        3.037443155 CPU20  196,324,797,879,414      cycles                           # 193003.773 GHz                       (71.69%)
> >        3.037443155 CPU5   197,679,956,608,205      stalled-cycles-backend           # 1315606428.66% backend cycles idle   (71.19%)
> >        3.037443155 CPU5   198,571,860,474,851      instructions                     # 13215422.58  insn per cycle
> >
> > It should move the contents in the cpuc->assign as well.
>
> Yes, the patch looks good to me.
>
> Reviewed-by: Kan Liang <kan.liang@linux.intel.com>

Thanks for your review, Kan.

Ingo, Peter, can you please pick this up?

Thanks,
Namhyung
Re: [PATCH] perf/x86: Fix out of range data
Posted by Namhyung Kim 1 year, 10 months ago
Ping!

On Tue, Jan 9, 2024 at 1:28 PM Namhyung Kim <namhyung@kernel.org> wrote:
>
> Hello,
>
> On Sat, Dec 16, 2023 at 4:42 AM Liang, Kan <kan.liang@linux.intel.com> wrote:
> >
> >
> >
> > On 2023-12-16 2:28 a.m., Namhyung Kim wrote:
> > > On x86 each cpu_hw_events maintains a table for counter assignment but
> > > it missed to update one for the deleted event in x86_pmu_del().  This
> > > can make perf_clear_dirty_counters() reset used counter if it's called
> > > before event scheduling or enabling.  Then it would return out of range
> > > data which doesn't make sense.
> > >
> > > The following code can reproduce the problem.
> > >
> > >   $ cat repro.c
> > >   #include <pthread.h>
> > >   #include <stdio.h>
> > >   #include <stdlib.h>
> > >   #include <unistd.h>
> > >   #include <linux/perf_event.h>
> > >   #include <sys/ioctl.h>
> > >   #include <sys/mman.h>
> > >   #include <sys/syscall.h>
> > >
> > >   struct perf_event_attr attr = {
> > >       .type = PERF_TYPE_HARDWARE,
> > >       .config = PERF_COUNT_HW_CPU_CYCLES,
> > >       .disabled = 1,
> > >   };
> > >
> > >   void *worker(void *arg)
> > >   {
> > >       int cpu = (long)arg;
> > >       int fd1 = syscall(SYS_perf_event_open, &attr, -1, cpu, -1, 0);
> > >       int fd2 = syscall(SYS_perf_event_open, &attr, -1, cpu, -1, 0);
> > >       void *p;
> > >
> > >       do {
> > >               ioctl(fd1, PERF_EVENT_IOC_ENABLE, 0);
> > >               p = mmap(NULL, 4096, PROT_READ, MAP_SHARED, fd1, 0);
> > >               ioctl(fd2, PERF_EVENT_IOC_ENABLE, 0);
> > >
> > >               ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0);
> > >               munmap(p, 4096);
> > >               ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0);
> > >       } while (1);
> > >
> > >       return NULL;
> > >   }
> > >
> > >   int main(void)
> > >   {
> > >       int i;
> > >       int n = sysconf(_SC_NPROCESSORS_ONLN);
> > >       pthread_t *th = calloc(n, sizeof(*th));
> > >
> > >       for (i = 0; i < n; i++)
> > >               pthread_create(&th[i], NULL, worker, (void *)(long)i);
> > >       for (i = 0; i < n; i++)
> > >               pthread_join(th[i], NULL);
> > >
> > >       free(th);
> > >       return 0;
> > >   }
> > >
> > > And you can see the out of range data using perf stat like this.
> > > Probably it'd be easier to see on a large machine.
> > >
> > >   $ gcc -o repro repro.c -pthread
> > >   $ ./repro &
> > >   $ sudo perf stat -A -I 1000 2>&1 | awk '{ if (length($3) > 15) print }'
> > >        1.001028462 CPU6   196,719,295,683,763      cycles                           # 194290.996 GHz                       (71.54%)
> > >        1.001028462 CPU3   396,077,485,787,730      branch-misses                    # 15804359784.80% of all branches      (71.07%)
> > >        1.001028462 CPU17  197,608,350,727,877      branch-misses                    # 14594186554.56% of all branches      (71.22%)
> > >        2.020064073 CPU4   198,372,472,612,140      cycles                           # 194681.113 GHz                       (70.95%)
> > >        2.020064073 CPU6   199,419,277,896,696      cycles                           # 195720.007 GHz                       (70.57%)
> > >        2.020064073 CPU20  198,147,174,025,639      cycles                           # 194474.654 GHz                       (71.03%)
> > >        2.020064073 CPU20  198,421,240,580,145      stalled-cycles-frontend          #  100.14% frontend cycles idle        (70.93%)
> > >        3.037443155 CPU4   197,382,689,923,416      cycles                           # 194043.065 GHz                       (71.30%)
> > >        3.037443155 CPU20  196,324,797,879,414      cycles                           # 193003.773 GHz                       (71.69%)
> > >        3.037443155 CPU5   197,679,956,608,205      stalled-cycles-backend           # 1315606428.66% backend cycles idle   (71.19%)
> > >        3.037443155 CPU5   198,571,860,474,851      instructions                     # 13215422.58  insn per cycle
> > >
> > > It should move the contents in the cpuc->assign as well.
> >
> > Yes, the patch looks good to me.
> >
> > Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
>
> Thanks for your review, Kan.
>
> Ingo, Peter, can you please pick this up?
>
> Thanks,
> Namhyung