[PATCH] perf/lock: Fix non-atomic max/time and min_time updates in contention_data

Suchit Karunakaran posted 1 patch 2 months ago
There is a newer version of this series
.../perf/util/bpf_skel/lock_contention.bpf.c  | 50 +++++++++++++++++--
1 file changed, 45 insertions(+), 5 deletions(-)
[PATCH] perf/lock: Fix non-atomic max/time and min_time updates in contention_data
Posted by Suchit Karunakaran 2 months ago
The update_contention_data() had a FIXME noting that max_time and
min_time updates lacked atomicity. Two CPUs could simultaneously
read a stale value, pass the comparison check and race on the
write-back, with the smaller value potentially overwriting the
larger one and silently corrupting the statistics.

Fix this by replacing the bare conditional assignments with a
bpf_loop()-based CAS retry loop. Each field tracks its own
convergence independently via max_done/min_done flags in cas_ctx,
so a successful CAS on one field is never retried even if the
other field needs more attempts.

Signed-off-by: Suchit Karunakaran <suchitkarunakaran@gmail.com>
---
 .../perf/util/bpf_skel/lock_contention.bpf.c  | 50 +++++++++++++++++--
 1 file changed, 45 insertions(+), 5 deletions(-)

diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c
index 96e7d853b9ed..5c8431be674a 100644
--- a/tools/perf/util/bpf_skel/lock_contention.bpf.c
+++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c
@@ -175,6 +175,13 @@ struct mm_struct___new {
 	struct rw_semaphore mmap_lock;
 } __attribute__((preserve_access_index));
 
+struct cas_ctx {
+	struct contention_data *data;
+	u64 duration;
+	int max_done;
+	int min_done;
+};
+
 extern struct kmem_cache *bpf_get_kmem_cache(u64 addr) __ksym __weak;
 
 /* control flags */
@@ -486,16 +493,49 @@ static inline s32 get_owner_stack_id(u64 *stacktrace)
 	return -1;
 }
 
+static long cas_min_max_cb(u64 idx, void *arg)
+{
+	struct cas_ctx *ctx = arg;
+
+	if (!ctx->max_done) {
+		u64 old_max = ctx->data->max_time;
+
+		if (old_max >= ctx->duration) {
+			ctx->max_done = 1;
+		} else {
+			u64 r = __sync_val_compare_and_swap(
+				&ctx->data->max_time, old_max, ctx->duration);
+			if (r == old_max)
+				ctx->max_done = 1;
+		}
+	}
+
+	if (!ctx->min_done) {
+		u64 old_min = ctx->data->min_time;
+
+		if (old_min <= ctx->duration) {
+			ctx->min_done = 1;
+		} else {
+			u64 r = __sync_val_compare_and_swap(
+				&ctx->data->min_time, old_min, ctx->duration);
+			if (r == old_min)
+				ctx->min_done = 1;
+		}
+	}
+
+	return (ctx->max_done && ctx->min_done) ? 1 : 0;
+}
+
 static inline void update_contention_data(struct contention_data *data, u64 duration, u32 count)
 {
 	__sync_fetch_and_add(&data->total_time, duration);
 	__sync_fetch_and_add(&data->count, count);
 
-	/* FIXME: need atomic operations */
-	if (data->max_time < duration)
-		data->max_time = duration;
-	if (data->min_time > duration)
-		data->min_time = duration;
+	struct cas_ctx ctx = {
+		.data     = data,
+		.duration = duration,
+		.max_done = 0,
+		.min_done = 0,
+	};
+	bpf_loop(64, cas_min_max_cb, &ctx, 0);
 }
 
 static inline void update_owner_stat(u32 id, u64 duration, u32 flags)
-- 
2.53.0
Re: [PATCH] perf/lock: Fix non-atomic max/time and min_time updates in contention_data
Posted by Namhyung Kim 1 month, 4 weeks ago
Hello,

On Sun, Apr 19, 2026 at 11:57:54PM +0530, Suchit Karunakaran wrote:
> The update_contention_data() had a FIXME noting that max_time and
> min_time updates lacked atomicity. Two CPUs could simultaneously
> read a stale value, pass the comparison check and race on the
> write-back, with the smaller value potentially overwriting the
> larger one and silently corrupting the statistics.
> 
> Fix this by replacing the bare conditional assignments with a
> bpf_loop()-based CAS retry loop. Each field tracks its own
> convergence independently via max_done/min_done flags in cas_ctx,
> so a successful CAS on one field is never retried even if the
> other field needs more attempts.

Interesting!

It looks like bpf_loop() is added at v5.17 - more than 4 years ago.
Then I think it's ok to have it now.

> 
> Signed-off-by: Suchit Karunakaran <suchitkarunakaran@gmail.com>

Acked-by: Namhyung Kim <namhyung@kernel.org>

Thanks,
Namhyung

> ---
>  .../perf/util/bpf_skel/lock_contention.bpf.c  | 50 +++++++++++++++++--
>  1 file changed, 45 insertions(+), 5 deletions(-)
> 
> diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c
> index 96e7d853b9ed..5c8431be674a 100644
> --- a/tools/perf/util/bpf_skel/lock_contention.bpf.c
> +++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c
> @@ -175,6 +175,13 @@ struct mm_struct___new {
>  	struct rw_semaphore mmap_lock;
>  } __attribute__((preserve_access_index));
>  
> +struct cas_ctx {
> +	struct contention_data *data;
> +	u64 duration;
> +	int max_done;
> +	int min_done;
> +};
> +
>  extern struct kmem_cache *bpf_get_kmem_cache(u64 addr) __ksym __weak;
>  
>  /* control flags */
> @@ -486,16 +493,49 @@ static inline s32 get_owner_stack_id(u64 *stacktrace)
>  	return -1;
>  }
>  
> +static long cas_min_max_cb(u64 idx, void *arg)
> +{
> +	struct cas_ctx *ctx = arg;
> +
> +	if (!ctx->max_done) {
> +		u64 old_max = ctx->data->max_time;
> +
> +		if (old_max >= ctx->duration) {
> +			ctx->max_done = 1;
> +		} else {
> +			u64 r = __sync_val_compare_and_swap(
> +				&ctx->data->max_time, old_max, ctx->duration);
> +			if (r == old_max)
> +				ctx->max_done = 1;
> +		}
> +	}
> +
> +	if (!ctx->min_done) {
> +		u64 old_min = ctx->data->min_time;
> +
> +		if (old_min <= ctx->duration) {
> +			ctx->min_done = 1;
> +		} else {
> +			u64 r = __sync_val_compare_and_swap(
> +				&ctx->data->min_time, old_min, ctx->duration);
> +			if (r == old_min)
> +				ctx->min_done = 1;
> +		}
> +	}
> +
> +	return (ctx->max_done && ctx->min_done) ? 1 : 0;
> +}
> +
>  static inline void update_contention_data(struct contention_data *data, u64 duration, u32 count)
>  {
>  	__sync_fetch_and_add(&data->total_time, duration);
>  	__sync_fetch_and_add(&data->count, count);
>  
> -	/* FIXME: need atomic operations */
> -	if (data->max_time < duration)
> -		data->max_time = duration;
> -	if (data->min_time > duration)
> -		data->min_time = duration;
> +	struct cas_ctx ctx = {
> +		.data     = data,
> +		.duration = duration,
> +		.max_done = 0,
> +		.min_done = 0,
> +	};
> +	bpf_loop(64, cas_min_max_cb, &ctx, 0);
>  }
>  
>  static inline void update_owner_stat(u32 id, u64 duration, u32 flags)
> -- 
> 2.53.0
>