[PATCH bpf-next] bpf: Remove RCU lock for perf callchain buffer

Tao Chen posted 1 patch 1 week, 2 days ago
kernel/bpf/stackmap.c | 12 +-----------
1 file changed, 1 insertion(+), 11 deletions(-)
[PATCH bpf-next] bpf: Remove RCU lock for perf callchain buffer
Posted by Tao Chen 1 week, 2 days ago
As Andrii suggested, BPF can guarantee perf callchain buffer won't
be released during use, for bpf_get_stack_id, BPF stack map will
keep them alive by delaying put_callchain_buffer() until freeing time
or for bpf_get_stack/bpf_get_task_stack, BPF program itself will hold
these buffers alive again, until freeing time which is delayed until
after RCU Tasks Trace + RCU grace period.

Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Tao Chen <chen.dylane@linux.dev>
---
 kernel/bpf/stackmap.c | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index da3d328f5c1..2e682d8697b 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -460,9 +460,6 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
 
 	max_depth = stack_map_calculate_max_depth(size, elem_size, flags);
 
-	if (may_fault)
-		rcu_read_lock(); /* need RCU for perf's callchain below */
-
 	if (trace_in) {
 		trace = trace_in;
 		trace->nr = min_t(u32, trace->nr, max_depth);
@@ -473,11 +470,8 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
 					   crosstask, false, 0);
 	}
 
-	if (unlikely(!trace) || trace->nr < skip) {
-		if (may_fault)
-			rcu_read_unlock();
+	if (unlikely(!trace) || trace->nr < skip)
 		goto err_fault;
-	}
 
 	trace_nr = trace->nr - skip;
 	copy_len = trace_nr * elem_size;
@@ -493,10 +487,6 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
 		memcpy(buf, ips, copy_len);
 	}
 
-	/* trace/ips should not be dereferenced after this point */
-	if (may_fault)
-		rcu_read_unlock();
-
 	if (user_build_id)
 		stack_map_get_build_id_offset(buf, trace_nr, user, may_fault);
 
-- 
2.48.1
Re: [PATCH bpf-next] bpf: Remove RCU lock for perf callchain buffer
Posted by Tao Chen 1 week, 1 day ago
在 2026/1/29 00:57, Tao Chen 写道:
> As Andrii suggested, BPF can guarantee perf callchain buffer won't
> be released during use, for bpf_get_stack_id, BPF stack map will
> keep them alive by delaying put_callchain_buffer() until freeing time
> or for bpf_get_stack/bpf_get_task_stack, BPF program itself will hold
> these buffers alive again, until freeing time which is delayed until
> after RCU Tasks Trace + RCU grace period.
> 
> Suggested-by: Andrii Nakryiko <andrii@kernel.org>
> Signed-off-by: Tao Chen <chen.dylane@linux.dev>
> ---
>   kernel/bpf/stackmap.c | 12 +-----------
>   1 file changed, 1 insertion(+), 11 deletions(-)
> 
> diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
> index da3d328f5c1..2e682d8697b 100644
> --- a/kernel/bpf/stackmap.c
> +++ b/kernel/bpf/stackmap.c
> @@ -460,9 +460,6 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
>   
>   	max_depth = stack_map_calculate_max_depth(size, elem_size, flags);
>   
> -	if (may_fault)
> -		rcu_read_lock(); /* need RCU for perf's callchain below */
> -
>   	if (trace_in) {
>   		trace = trace_in;
>   		trace->nr = min_t(u32, trace->nr, max_depth);
> @@ -473,11 +470,8 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
>   					   crosstask, false, 0);
>   	}
>   
> -	if (unlikely(!trace) || trace->nr < skip) {
> -		if (may_fault)
> -			rcu_read_unlock();
> +	if (unlikely(!trace) || trace->nr < skip)
>   		goto err_fault;
> -	}
>   
>   	trace_nr = trace->nr - skip;
>   	copy_len = trace_nr * elem_size;
> @@ -493,10 +487,6 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
>   		memcpy(buf, ips, copy_len);
>   	}
>   
> -	/* trace/ips should not be dereferenced after this point */
> -	if (may_fault)
> -		rcu_read_unlock();
> -
>   	if (user_build_id)
>   		stack_map_get_build_id_offset(buf, trace_nr, user, may_fault);
>   

Hi Andrii,

I send this patch as a separate submission, and I'm not sure when the 
patchset will be merged. After this merged, i will update v9. Is it ok?

The previous patchset:
https://lore.kernel.org/bpf/CAEf4BzbubXxLHsO_2aVBG_oYE1Sc9iafLyvcX-PEjcQ7EXWCqA@mail.gmail.com/

And i also saw Qing reported a syzbot issue recently.
https://lore.kernel.org/bpf/20260128084538.1889001-1-wangqing7171@gmail.com/

-- 
Best Regards
Tao Chen
Re: [PATCH bpf-next] bpf: Remove RCU lock for perf callchain buffer
Posted by Andrii Nakryiko 3 days, 10 hours ago
On Fri, Jan 30, 2026 at 12:52 AM Tao Chen <chen.dylane@linux.dev> wrote:
>
> 在 2026/1/29 00:57, Tao Chen 写道:
> > As Andrii suggested, BPF can guarantee perf callchain buffer won't
> > be released during use, for bpf_get_stack_id, BPF stack map will
> > keep them alive by delaying put_callchain_buffer() until freeing time
> > or for bpf_get_stack/bpf_get_task_stack, BPF program itself will hold
> > these buffers alive again, until freeing time which is delayed until
> > after RCU Tasks Trace + RCU grace period.
> >
> > Suggested-by: Andrii Nakryiko <andrii@kernel.org>
> > Signed-off-by: Tao Chen <chen.dylane@linux.dev>
> > ---
> >   kernel/bpf/stackmap.c | 12 +-----------
> >   1 file changed, 1 insertion(+), 11 deletions(-)
> >
> > diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
> > index da3d328f5c1..2e682d8697b 100644
> > --- a/kernel/bpf/stackmap.c
> > +++ b/kernel/bpf/stackmap.c
> > @@ -460,9 +460,6 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
> >
> >       max_depth = stack_map_calculate_max_depth(size, elem_size, flags);
> >
> > -     if (may_fault)
> > -             rcu_read_lock(); /* need RCU for perf's callchain below */
> > -
> >       if (trace_in) {
> >               trace = trace_in;
> >               trace->nr = min_t(u32, trace->nr, max_depth);
> > @@ -473,11 +470,8 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
> >                                          crosstask, false, 0);
> >       }
> >
> > -     if (unlikely(!trace) || trace->nr < skip) {
> > -             if (may_fault)
> > -                     rcu_read_unlock();
> > +     if (unlikely(!trace) || trace->nr < skip)
> >               goto err_fault;
> > -     }
> >
> >       trace_nr = trace->nr - skip;
> >       copy_len = trace_nr * elem_size;
> > @@ -493,10 +487,6 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
> >               memcpy(buf, ips, copy_len);
> >       }
> >
> > -     /* trace/ips should not be dereferenced after this point */
> > -     if (may_fault)
> > -             rcu_read_unlock();
> > -
> >       if (user_build_id)
> >               stack_map_get_build_id_offset(buf, trace_nr, user, may_fault);
> >
>
> Hi Andrii,
>
> I send this patch as a separate submission, and I'm not sure when the
> patchset will be merged. After this merged, i will update v9. Is it ok?

See my reply to your other patch set. We should probably just replace
RCU manipulations with preempt_disable/enable and be done with all of
this. (and RCU Tasks Trace is implying RCU right now and that is going
to be guaranteed moving forward, we already make this assumption in
recent bpf_timer patch set).

>
> The previous patchset:
> https://lore.kernel.org/bpf/CAEf4BzbubXxLHsO_2aVBG_oYE1Sc9iafLyvcX-PEjcQ7EXWCqA@mail.gmail.com/
>
> And i also saw Qing reported a syzbot issue recently.
> https://lore.kernel.org/bpf/20260128084538.1889001-1-wangqing7171@gmail.com/
>
> --
> Best Regards
> Tao Chen