[PATCH 03/19] perf: Fix perf_pmu_register() vs perf_init_event()

Peter Zijlstra posted 19 patches 1 year, 3 months ago
There is a newer version of this series
[PATCH 03/19] perf: Fix perf_pmu_register() vs perf_init_event()
Posted by Peter Zijlstra 1 year, 3 months ago
There is a fairly obvious race between perf_init_event() doing
idr_find() and perf_pmu_register() doing idr_alloc() with an
incompletely initialized pmu pointer.

Avoid by doing idr_alloc() on a NULL pointer to register the id, and
swizzling the real pmu pointer at the end using idr_replace().

Also making sure to not set pmu members after publishing the pmu, duh.

[ introduce idr_cmpxchg() in order to better handle the idr_replace()
  error case -- if it were to return an unexpected pointer, it will
  already have replaced the value and there is no going back. ]

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 kernel/events/core.c |   28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -11739,6 +11739,21 @@ static int pmu_dev_alloc(struct pmu *pmu
 static struct lock_class_key cpuctx_mutex;
 static struct lock_class_key cpuctx_lock;
 
+static bool idr_cmpxchg(struct idr *idr, unsigned long id, void *old, void *new)
+{
+	void *tmp, *val = idr_find(idr, id);
+
+	if (val != old)
+		return false;
+
+	tmp = idr_replace(idr, new, id);
+	if (IS_ERR(tmp))
+		return false;
+
+	WARN_ON_ONCE(tmp != val);
+	return true;
+}
+
 int perf_pmu_register(struct pmu *pmu, const char *name, int type)
 {
 	int cpu, ret, max = PERF_TYPE_MAX;
@@ -11765,7 +11780,7 @@ int perf_pmu_register(struct pmu *pmu, c
 	if (type >= 0)
 		max = type;
 
-	ret = idr_alloc(&pmu_idr, pmu, max, 0, GFP_KERNEL);
+	ret = idr_alloc(&pmu_idr, NULL, max, 0, GFP_KERNEL);
 	if (ret < 0)
 		goto free_pdc;
 
@@ -11773,6 +11788,7 @@ int perf_pmu_register(struct pmu *pmu, c
 
 	type = ret;
 	pmu->type = type;
+	atomic_set(&pmu->exclusive_cnt, 0);
 
 	if (pmu_bus_running && !pmu->dev) {
 		ret = pmu_dev_alloc(pmu);
@@ -11821,14 +11837,22 @@ int perf_pmu_register(struct pmu *pmu, c
 	if (!pmu->event_idx)
 		pmu->event_idx = perf_event_idx_default;
 
+	/*
+	 * Now that the PMU is complete, make it visible to perf_try_init_event().
+	 */
+	if (!idr_cmpxchg(&pmu_idr, pmu->type, NULL, pmu))
+		goto free_context;
 	list_add_rcu(&pmu->entry, &pmus);
-	atomic_set(&pmu->exclusive_cnt, 0);
+
 	ret = 0;
 unlock:
 	mutex_unlock(&pmus_lock);
 
 	return ret;
 
+free_context:
+	free_percpu(pmu->cpu_pmu_context);
+
 free_dev:
 	if (pmu->dev && pmu->dev != PMU_NULL_DEV) {
 		device_del(pmu->dev);
Re: [PATCH 03/19] perf: Fix perf_pmu_register() vs perf_init_event()
Posted by Uros Bizjak 1 year, 3 months ago

On 4. 11. 24 14:39, Peter Zijlstra wrote:
> There is a fairly obvious race between perf_init_event() doing
> idr_find() and perf_pmu_register() doing idr_alloc() with an
> incompletely initialized pmu pointer.
> 
> Avoid by doing idr_alloc() on a NULL pointer to register the id, and
> swizzling the real pmu pointer at the end using idr_replace().
> 
> Also making sure to not set pmu members after publishing the pmu, duh.
> 
> [ introduce idr_cmpxchg() in order to better handle the idr_replace()
>    error case -- if it were to return an unexpected pointer, it will
>    already have replaced the value and there is no going back. ]
> 
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
>   kernel/events/core.c |   28 ++++++++++++++++++++++++++--
>   1 file changed, 26 insertions(+), 2 deletions(-)
> 
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -11739,6 +11739,21 @@ static int pmu_dev_alloc(struct pmu *pmu
>   static struct lock_class_key cpuctx_mutex;
>   static struct lock_class_key cpuctx_lock;
>   
> +static bool idr_cmpxchg(struct idr *idr, unsigned long id, void *old, void *new)
> +{
> +	void *tmp, *val = idr_find(idr, id);
> +
> +	if (val != old)
> +		return false;
> +
> +	tmp = idr_replace(idr, new, id);
> +	if (IS_ERR(tmp))
> +		return false;
> +
> +	WARN_ON_ONCE(tmp != val);
> +	return true;
> +}

Can the above function be named idr_try_cmpxchg?

cmpxchg family of functions return an old value from the location and 
one would expect that idr_cmpxchg() returns an old value from *idr, too. 
idr_cmpxchg() function however returns success/failure status, and this 
is also what functions from try_cmpxchg family return.

Uros.
Re: [PATCH 03/19] perf: Fix perf_pmu_register() vs perf_init_event()
Posted by Peter Zijlstra 1 year, 3 months ago
On Mon, Nov 04, 2024 at 04:36:26PM +0100, Uros Bizjak wrote:
> 
> 
> On 4. 11. 24 14:39, Peter Zijlstra wrote:
> > There is a fairly obvious race between perf_init_event() doing
> > idr_find() and perf_pmu_register() doing idr_alloc() with an
> > incompletely initialized pmu pointer.
> > 
> > Avoid by doing idr_alloc() on a NULL pointer to register the id, and
> > swizzling the real pmu pointer at the end using idr_replace().
> > 
> > Also making sure to not set pmu members after publishing the pmu, duh.
> > 
> > [ introduce idr_cmpxchg() in order to better handle the idr_replace()
> >    error case -- if it were to return an unexpected pointer, it will
> >    already have replaced the value and there is no going back. ]
> > 
> > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> > ---
> >   kernel/events/core.c |   28 ++++++++++++++++++++++++++--
> >   1 file changed, 26 insertions(+), 2 deletions(-)
> > 
> > --- a/kernel/events/core.c
> > +++ b/kernel/events/core.c
> > @@ -11739,6 +11739,21 @@ static int pmu_dev_alloc(struct pmu *pmu
> >   static struct lock_class_key cpuctx_mutex;
> >   static struct lock_class_key cpuctx_lock;
> > +static bool idr_cmpxchg(struct idr *idr, unsigned long id, void *old, void *new)
> > +{
> > +	void *tmp, *val = idr_find(idr, id);
> > +
> > +	if (val != old)
> > +		return false;
> > +
> > +	tmp = idr_replace(idr, new, id);
> > +	if (IS_ERR(tmp))
> > +		return false;
> > +
> > +	WARN_ON_ONCE(tmp != val);
> > +	return true;
> > +}
> 
> Can the above function be named idr_try_cmpxchg?
> 
> cmpxchg family of functions return an old value from the location and one
> would expect that idr_cmpxchg() returns an old value from *idr, too.
> idr_cmpxchg() function however returns success/failure status, and this is
> also what functions from try_cmpxchg family return.

Fair enough -- OTOH, this function is very much not atomic. I considered
calling it idr_cas() as to distance itself from cmpxchg family.

Also, it is local to perf, and not placed in idr.h or similar.

While the usage here is somewhat spurious, it gets used later on in the
series to better effect.
[tip: perf/core] perf/core: Fix perf_pmu_register() vs. perf_init_event()
Posted by tip-bot2 for Peter Zijlstra 11 months, 2 weeks ago
The following commit has been merged into the perf/core branch of tip:

Commit-ID:     003659fec9f6d8c04738cb74b5384398ae8a7e88
Gitweb:        https://git.kernel.org/tip/003659fec9f6d8c04738cb74b5384398ae8a7e88
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Mon, 04 Nov 2024 14:39:12 +01:00
Committer:     Ingo Molnar <mingo@kernel.org>
CommitterDate: Sat, 01 Mar 2025 19:38:42 +01:00

perf/core: Fix perf_pmu_register() vs. perf_init_event()

There is a fairly obvious race between perf_init_event() doing
idr_find() and perf_pmu_register() doing idr_alloc() with an
incompletely initialized PMU pointer.

Avoid by doing idr_alloc() on a NULL pointer to register the id, and
swizzling the real struct pmu pointer at the end using idr_replace().

Also making sure to not set struct pmu members after publishing
the struct pmu, duh.

[ introduce idr_cmpxchg() in order to better handle the idr_replace()
  error case -- if it were to return an unexpected pointer, it will
  already have replaced the value and there is no going back. ]

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20241104135517.858805880@infradead.org
---
 kernel/events/core.c | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 11793d6..823aa08 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -11830,6 +11830,21 @@ free_dev:
 static struct lock_class_key cpuctx_mutex;
 static struct lock_class_key cpuctx_lock;
 
+static bool idr_cmpxchg(struct idr *idr, unsigned long id, void *old, void *new)
+{
+	void *tmp, *val = idr_find(idr, id);
+
+	if (val != old)
+		return false;
+
+	tmp = idr_replace(idr, new, id);
+	if (IS_ERR(tmp))
+		return false;
+
+	WARN_ON_ONCE(tmp != val);
+	return true;
+}
+
 int perf_pmu_register(struct pmu *pmu, const char *name, int type)
 {
 	int cpu, ret, max = PERF_TYPE_MAX;
@@ -11856,7 +11871,7 @@ int perf_pmu_register(struct pmu *pmu, const char *name, int type)
 	if (type >= 0)
 		max = type;
 
-	ret = idr_alloc(&pmu_idr, pmu, max, 0, GFP_KERNEL);
+	ret = idr_alloc(&pmu_idr, NULL, max, 0, GFP_KERNEL);
 	if (ret < 0)
 		goto free_pdc;
 
@@ -11864,6 +11879,7 @@ int perf_pmu_register(struct pmu *pmu, const char *name, int type)
 
 	type = ret;
 	pmu->type = type;
+	atomic_set(&pmu->exclusive_cnt, 0);
 
 	if (pmu_bus_running && !pmu->dev) {
 		ret = pmu_dev_alloc(pmu);
@@ -11912,14 +11928,22 @@ int perf_pmu_register(struct pmu *pmu, const char *name, int type)
 	if (!pmu->event_idx)
 		pmu->event_idx = perf_event_idx_default;
 
+	/*
+	 * Now that the PMU is complete, make it visible to perf_try_init_event().
+	 */
+	if (!idr_cmpxchg(&pmu_idr, pmu->type, NULL, pmu))
+		goto free_context;
 	list_add_rcu(&pmu->entry, &pmus);
-	atomic_set(&pmu->exclusive_cnt, 0);
+
 	ret = 0;
 unlock:
 	mutex_unlock(&pmus_lock);
 
 	return ret;
 
+free_context:
+	free_percpu(pmu->cpu_pmu_context);
+
 free_dev:
 	if (pmu->dev && pmu->dev != PMU_NULL_DEV) {
 		device_del(pmu->dev);