[PATCH bpf-next v3 4/7] bpf,x86: add tracing session supporting for x86_64

Menglong Dong posted 7 patches 3 months, 2 weeks ago
There is a newer version of this series
[PATCH bpf-next v3 4/7] bpf,x86: add tracing session supporting for x86_64
Posted by Menglong Dong 3 months, 2 weeks ago
Add BPF_TRACE_SESSION supporting to x86_64. invoke_bpf_session_entry and
invoke_bpf_session_exit is introduced for this purpose.

In invoke_bpf_session_entry(), we will check if the return value of the
fentry is 0, and set the corresponding session flag if not. And in
invoke_bpf_session_exit(), we will check if the corresponding flag is
set. If set, the fexit will be skipped.

As designed, the session flags and session cookie address is stored after
the return value, and the stack look like this:

  cookie ptr	-> 8 bytes
  session flags	-> 8 bytes
  return value	-> 8 bytes
  argN		-> 8 bytes
  ...
  arg1		-> 8 bytes
  nr_args	-> 8 bytes
  ...
  cookieN	-> 8 bytes
  cookie1	-> 8 bytes

In the entry of the session, we will clear the return value, so the fentry
will always get 0 with ctx[nr_args] or bpf_get_func_ret().

Before the execution of the BPF prog, the "cookie ptr" will be filled with
the corresponding cookie address, which is done in
invoke_bpf_session_entry() and invoke_bpf_session_exit().

Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
Co-developed-by: Leon Hwang <leon.hwang@linux.dev>
Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
v2:
- add session cookie support
- add the session stuff after return value, instead of before nr_args
---
 arch/x86/net/bpf_jit_comp.c | 185 +++++++++++++++++++++++++++++++++++-
 1 file changed, 181 insertions(+), 4 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 7a604ee9713f..2fffc530c88c 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -3109,6 +3109,148 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
 	return 0;
 }
 
+static int invoke_bpf_session_entry(const struct btf_func_model *m, u8 **pprog,
+				    struct bpf_tramp_links *tl, int stack_size,
+				    int run_ctx_off, int ret_off, int sflags_off,
+				    int cookies_off, void *image, void *rw_image)
+{
+	int i, j = 0, cur_cookie_off;
+	u64 session_flags;
+	u8 *prog = *pprog;
+	u8 *jmp_insn;
+
+	/* clear the session flags:
+	 *   xor rax, rax
+	 *   mov QWORD PTR [rbp - sflags_off], rax
+	 */
+	EMIT3(0x48, 0x31, 0xC0);
+	emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -sflags_off);
+	/*
+	 * clear the return value to make sure bpf_get_func_ret() always
+	 * get 0 in fentry:
+	 *   mov QWORD PTR [rbp - 0x8], rax
+	 */
+	emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -ret_off);
+	/* clear all the cookies in the cookie array */
+	for (i = 0; i < tl->nr_links; i++) {
+		if (tl->links[i]->link.prog->call_session_cookie) {
+			cur_cookie_off = -cookies_off + j * 8;
+			/* mov QWORD PTR [rbp - sflags_off], rax */
+			emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0,
+				 cur_cookie_off);
+			j++;
+		}
+	}
+
+	j = 0;
+	for (i = 0; i < tl->nr_links; i++) {
+		if (tl->links[i]->link.prog->call_session_cookie) {
+			cur_cookie_off = -cookies_off + j * 8;
+			/*
+			 * save the cookie address to rbp - sflags_off + 8:
+			 *   lea rax, [rbp - cur_cookie_off]
+			 *   mov QWORD PTR [rbp - sflags_off + 8], rax
+			 */
+			if (!is_imm8(cur_cookie_off))
+				EMIT3_off32(0x48, 0x8D, 0x85, cur_cookie_off);
+			else
+				EMIT4(0x48, 0x8D, 0x45, cur_cookie_off);
+			emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -sflags_off + 8);
+			j++;
+		}
+		if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, run_ctx_off, true,
+				    ret_off, image, rw_image))
+			return -EINVAL;
+
+		/* fentry prog stored return value into [rbp - 8]. Emit:
+		 * if (*(u64 *)(rbp - ret_off) !=  0) {
+		 *	*(u64 *)(rbp - sflags_off) |= (1 << (i + 1));
+		 *	*(u64 *)(rbp - ret_off) = 0;
+		 * }
+		 */
+		/* cmp QWORD PTR [rbp - ret_off], 0x0 */
+		EMIT4(0x48, 0x83, 0x7d, -ret_off); EMIT1(0x00);
+		/* emit 2 nops that will be replaced with JE insn */
+		jmp_insn = prog;
+		emit_nops(&prog, 2);
+
+		session_flags = (1ULL << (i + 1));
+		/* mov rax, $session_flags */
+		emit_mov_imm64(&prog, BPF_REG_0, session_flags >> 32, (u32) session_flags);
+		/* or QWORD PTR [rbp - sflags_off], rax */
+		EMIT2(0x48, 0x09);
+		emit_insn_suffix(&prog, BPF_REG_FP, BPF_REG_0, -sflags_off);
+
+		/* mov QWORD PTR [rbp - ret_off], 0x0 */
+		EMIT4(0x48, 0xC7, 0x45, -ret_off); EMIT4(0x00, 0x00, 0x00, 0x00);
+
+		jmp_insn[0] = X86_JE;
+		jmp_insn[1] = prog - jmp_insn - 2;
+	}
+
+	*pprog = prog;
+	return 0;
+}
+
+static int invoke_bpf_session_exit(const struct btf_func_model *m, u8 **pprog,
+				   struct bpf_tramp_links *tl, int stack_size,
+				   int run_ctx_off, int ret_off, int sflags_off,
+				   int cookies_off, void *image, void *rw_image)
+{
+	int i, j = 0, cur_cookie_off;
+	u64 session_flags;
+	u8 *prog = *pprog;
+	u8 *jmp_insn;
+
+	/*
+	 * set the bpf_trace_is_exit flag to the session flags:
+	 *   mov rax, 1
+	 *   or QWORD PTR [rbp - sflags_off], rax
+	 */
+	emit_mov_imm32(&prog, false, BPF_REG_0, 1);
+	EMIT2(0x48, 0x09);
+	emit_insn_suffix(&prog, BPF_REG_FP, BPF_REG_0, -sflags_off);
+
+	for (i = 0; i < tl->nr_links; i++) {
+		if (tl->links[i]->link.prog->call_session_cookie) {
+			cur_cookie_off = -cookies_off + j * 8;
+			/*
+			 * save the cookie address to rbp - sflags_off + 8:
+			 *   lea rax, [rbp - cur_cookie_off]
+			 *   mov QWORD PTR [rbp - sflags_off + 8], rax
+			 */
+			if (!is_imm8(cur_cookie_off))
+				EMIT3_off32(0x48, 0x8D, 0x85, cur_cookie_off);
+			else
+				EMIT4(0x48, 0x8D, 0x45, cur_cookie_off);
+			emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -sflags_off + 8);
+			j++;
+		}
+		/* check if (1 << (i+1)) is set in the session flags, and
+		 * skip the execution of the fexit program if it is.
+		 */
+		session_flags = 1ULL << (i + 1);
+		/* mov rax, $session_flags */
+		emit_mov_imm64(&prog, BPF_REG_0, session_flags >> 32, (u32) session_flags);
+		/* test QWORD PTR [rbp - sflags_off], rax */
+		EMIT2(0x48, 0x85);
+		emit_insn_suffix(&prog, BPF_REG_FP, BPF_REG_0, -sflags_off);
+		/* emit 2 nops that will be replaced with JE insn */
+		jmp_insn = prog;
+		emit_nops(&prog, 2);
+
+		if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, run_ctx_off, false,
+				    ret_off, image, rw_image))
+			return -EINVAL;
+
+		jmp_insn[0] = X86_JNE;
+		jmp_insn[1] = prog - jmp_insn - 2;
+	}
+
+	*pprog = prog;
+	return 0;
+}
+
 /* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */
 #define LOAD_TRAMP_TAIL_CALL_CNT_PTR(stack)	\
 	__LOAD_TCC_PTR(-round_up(stack, 8) - 8)
@@ -3181,8 +3323,9 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
 {
 	int i, ret, nr_regs = m->nr_args, stack_size = 0;
 	int ret_off, regs_off, nregs_off, ip_off, run_ctx_off, arg_stack_off,
-	    rbx_off;
+	    rbx_off, sflags_off = 0, cookies_off;
 	struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
+	struct bpf_tramp_links *session = &tlinks[BPF_TRAMP_SESSION];
 	struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
 	struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
 	void *orig_call = func_addr;
@@ -3215,6 +3358,9 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
 	 * RBP + 8         [ return address  ]
 	 * RBP + 0         [ RBP             ]
 	 *
+	 *                  [ cookie ptr ] tracing session
+	 * RBP - sflags_off [ session flags ] tracing session
+	 *
 	 * RBP - ret_off   [ return value    ]  BPF_TRAMP_F_CALL_ORIG or
 	 *                                      BPF_TRAMP_F_RET_FENTRY_RET flags
 	 *
@@ -3230,6 +3376,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
 	 *
 	 * RBP - run_ctx_off [ bpf_tramp_run_ctx ]
 	 *
+	 *                   [ session cookieN ]
+	 *                   [ ... ]
+	 * RBP - cookies_off [ session cookie1 ] tracing session
+	 *
 	 *                     [ stack_argN ]  BPF_TRAMP_F_CALL_ORIG
 	 *                     [ ...        ]
 	 *                     [ stack_arg2 ]
@@ -3237,6 +3387,12 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
 	 * RSP                 [ tail_call_cnt_ptr ] BPF_TRAMP_F_TAIL_CALL_CTX
 	 */
 
+	/* room for session flags and cookie ptr */
+	if (session->nr_links) {
+		stack_size += 8 + 8;
+		sflags_off = stack_size;
+	}
+
 	/* room for return value of orig_call or fentry prog */
 	save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET);
 	if (save_ret)
@@ -3261,6 +3417,14 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
 	stack_size += (sizeof(struct bpf_tramp_run_ctx) + 7) & ~0x7;
 	run_ctx_off = stack_size;
 
+	if (session->nr_links) {
+		for (i = 0; i < session->nr_links; i++) {
+			if (session->links[i]->link.prog->call_session_cookie)
+				stack_size += 8;
+		}
+	}
+	cookies_off = stack_size;
+
 	if (nr_regs > 6 && (flags & BPF_TRAMP_F_CALL_ORIG)) {
 		/* the space that used to pass arguments on-stack */
 		stack_size += (nr_regs - get_nr_used_regs(m)) * 8;
@@ -3349,6 +3513,13 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
 			return -EINVAL;
 	}
 
+	if (session->nr_links) {
+		if (invoke_bpf_session_entry(m, &prog, session, regs_off,
+					     run_ctx_off, ret_off, sflags_off,
+					     cookies_off, image, rw_image))
+			return -EINVAL;
+	}
+
 	if (fmod_ret->nr_links) {
 		branches = kcalloc(fmod_ret->nr_links, sizeof(u8 *),
 				   GFP_KERNEL);
@@ -3414,6 +3585,15 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
 		}
 	}
 
+	if (session->nr_links) {
+		if (invoke_bpf_session_exit(m, &prog, session, regs_off,
+					    run_ctx_off, ret_off, sflags_off,
+					    cookies_off, image, rw_image)) {
+			ret = -EINVAL;
+			goto cleanup;
+		}
+	}
+
 	if (flags & BPF_TRAMP_F_RESTORE_REGS)
 		restore_regs(m, &prog, regs_off);
 
@@ -3483,9 +3663,6 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
 	int ret;
 	u32 size = image_end - image;
 
-	if (tlinks[BPF_TRAMP_SESSION].nr_links)
-		return -EOPNOTSUPP;
-
 	/* rw_image doesn't need to be in module memory range, so we can
 	 * use kvmalloc.
 	 */
-- 
2.51.1
Re: [PATCH bpf-next v3 4/7] bpf,x86: add tracing session supporting for x86_64
Posted by Alexei Starovoitov 3 months, 1 week ago
On Sat, Oct 25, 2025 at 8:02 PM Menglong Dong <menglong8.dong@gmail.com> wrote:
>
> Add BPF_TRACE_SESSION supporting to x86_64. invoke_bpf_session_entry and
> invoke_bpf_session_exit is introduced for this purpose.
>
> In invoke_bpf_session_entry(), we will check if the return value of the
> fentry is 0, and set the corresponding session flag if not. And in
> invoke_bpf_session_exit(), we will check if the corresponding flag is
> set. If set, the fexit will be skipped.
>
> As designed, the session flags and session cookie address is stored after
> the return value, and the stack look like this:
>
>   cookie ptr    -> 8 bytes
>   session flags -> 8 bytes
>   return value  -> 8 bytes
>   argN          -> 8 bytes
>   ...
>   arg1          -> 8 bytes
>   nr_args       -> 8 bytes
>   ...
>   cookieN       -> 8 bytes
>   cookie1       -> 8 bytes
>
> In the entry of the session, we will clear the return value, so the fentry
> will always get 0 with ctx[nr_args] or bpf_get_func_ret().
>
> Before the execution of the BPF prog, the "cookie ptr" will be filled with
> the corresponding cookie address, which is done in
> invoke_bpf_session_entry() and invoke_bpf_session_exit().

...

> +       if (session->nr_links) {
> +               for (i = 0; i < session->nr_links; i++) {
> +                       if (session->links[i]->link.prog->call_session_cookie)
> +                               stack_size += 8;
> +               }
> +       }
> +       cookies_off = stack_size;

This is not great. It's all root and such,
but if somebody attaches 64 progs that use session cookies
then the trampoline will consume 64*8 of stack space just for
these cookies. Plus more for args, cookie, ptr, session_flag, etc.
Sigh.
I understand that cookie from one session shouldn't interfere
with another, but it's all getting quite complex
especially when everything is in assembly.
And this is just x86 JIT. Other JITs would need to copy
this complex logic :(
At this point I'm not sure that "symmetry with kprobe_multi_session"
is justified as a reason to add all that.
We don't have a kprobe_session for individual kprobes after all.

I think we better spend the energy designing a mechanism to
connect existing fentry prog with fexit prog without hacking
it through a stack in the bpf trampoline.

Sorry.

pw-bot: cr
Re: [PATCH bpf-next v3 4/7] bpf,x86: add tracing session supporting for x86_64
Posted by Menglong Dong 3 months, 1 week ago
On Fri, Oct 31, 2025 at 9:42 AM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Sat, Oct 25, 2025 at 8:02 PM Menglong Dong <menglong8.dong@gmail.com> wrote:
> >
> > Add BPF_TRACE_SESSION supporting to x86_64. invoke_bpf_session_entry and
> > invoke_bpf_session_exit is introduced for this purpose.
> >
> > In invoke_bpf_session_entry(), we will check if the return value of the
> > fentry is 0, and set the corresponding session flag if not. And in
> > invoke_bpf_session_exit(), we will check if the corresponding flag is
> > set. If set, the fexit will be skipped.
> >
> > As designed, the session flags and session cookie address is stored after
> > the return value, and the stack look like this:
> >
> >   cookie ptr    -> 8 bytes
> >   session flags -> 8 bytes
> >   return value  -> 8 bytes
> >   argN          -> 8 bytes
> >   ...
> >   arg1          -> 8 bytes
> >   nr_args       -> 8 bytes
> >   ...
> >   cookieN       -> 8 bytes
> >   cookie1       -> 8 bytes
> >
> > In the entry of the session, we will clear the return value, so the fentry
> > will always get 0 with ctx[nr_args] or bpf_get_func_ret().
> >
> > Before the execution of the BPF prog, the "cookie ptr" will be filled with
> > the corresponding cookie address, which is done in
> > invoke_bpf_session_entry() and invoke_bpf_session_exit().
>
> ...
>
> > +       if (session->nr_links) {
> > +               for (i = 0; i < session->nr_links; i++) {
> > +                       if (session->links[i]->link.prog->call_session_cookie)
> > +                               stack_size += 8;
> > +               }
> > +       }
> > +       cookies_off = stack_size;
>
> This is not great. It's all root and such,
> but if somebody attaches 64 progs that use session cookies
> then the trampoline will consume 64*8 of stack space just for
> these cookies. Plus more for args, cookie, ptr, session_flag, etc.

The session cookie stuff does take a lot of stack memory.
For fprobe, it will store the cookie into its shadow stack, which
can free the stack.

How about we remove the session cookie stuff? Therefore,
only 8-bytes(session flags) are used on the stack. And if we reuse
the nr_regs slot, no stack will be consumed. However, it will make
thing complex, which I don't think we should do.

> Sigh.
> I understand that cookie from one session shouldn't interfere
> with another, but it's all getting quite complex
> especially when everything is in assembly.
> And this is just x86 JIT. Other JITs would need to copy
> this complex logic :(

Without the session cookie, it will be much easier to implement
in another arch. And with the hepler of AI(such as cursor), it can
be done easily ;)

> At this point I'm not sure that "symmetry with kprobe_multi_session"
> is justified as a reason to add all that.
> We don't have a kprobe_session for individual kprobes after all.

As for my case, the tracing session can make my code much
simpler, as I always use the fentry+fexit to hook a function. And
the fexit skipping according to the return value of fentry can also
achieve better performance.

AFAIT, the mast usage of session cookie in kprobe is passing the
function arguments to the exit. For tracing, we can get the args
in the fexit. So the session cookie in tracing is not as important as
in kprobe.

What do you think?

Thanks!
Menglong Dong

> I think we better spend the energy designing a mechanism to
> connect existing fentry prog with fexit prog without hacking
> it through a stack in the bpf trampoline.
>
> Sorry.
>
> pw-bot: cr
Re: [PATCH bpf-next v3 4/7] bpf,x86: add tracing session supporting for x86_64
Posted by Alexei Starovoitov 3 months, 1 week ago
On Thu, Oct 30, 2025 at 8:36 PM Menglong Dong <menglong8.dong@gmail.com> wrote:
>
> On Fri, Oct 31, 2025 at 9:42 AM Alexei Starovoitov
> <alexei.starovoitov@gmail.com> wrote:
> >
> > On Sat, Oct 25, 2025 at 8:02 PM Menglong Dong <menglong8.dong@gmail.com> wrote:
> > >
> > > Add BPF_TRACE_SESSION supporting to x86_64. invoke_bpf_session_entry and
> > > invoke_bpf_session_exit is introduced for this purpose.
> > >
> > > In invoke_bpf_session_entry(), we will check if the return value of the
> > > fentry is 0, and set the corresponding session flag if not. And in
> > > invoke_bpf_session_exit(), we will check if the corresponding flag is
> > > set. If set, the fexit will be skipped.
> > >
> > > As designed, the session flags and session cookie address is stored after
> > > the return value, and the stack look like this:
> > >
> > >   cookie ptr    -> 8 bytes
> > >   session flags -> 8 bytes
> > >   return value  -> 8 bytes
> > >   argN          -> 8 bytes
> > >   ...
> > >   arg1          -> 8 bytes
> > >   nr_args       -> 8 bytes
> > >   ...
> > >   cookieN       -> 8 bytes
> > >   cookie1       -> 8 bytes
> > >
> > > In the entry of the session, we will clear the return value, so the fentry
> > > will always get 0 with ctx[nr_args] or bpf_get_func_ret().
> > >
> > > Before the execution of the BPF prog, the "cookie ptr" will be filled with
> > > the corresponding cookie address, which is done in
> > > invoke_bpf_session_entry() and invoke_bpf_session_exit().
> >
> > ...
> >
> > > +       if (session->nr_links) {
> > > +               for (i = 0; i < session->nr_links; i++) {
> > > +                       if (session->links[i]->link.prog->call_session_cookie)
> > > +                               stack_size += 8;
> > > +               }
> > > +       }
> > > +       cookies_off = stack_size;
> >
> > This is not great. It's all root and such,
> > but if somebody attaches 64 progs that use session cookies
> > then the trampoline will consume 64*8 of stack space just for
> > these cookies. Plus more for args, cookie, ptr, session_flag, etc.
>
> The session cookie stuff does take a lot of stack memory.
> For fprobe, it will store the cookie into its shadow stack, which
> can free the stack.
>
> How about we remove the session cookie stuff? Therefore,
> only 8-bytes(session flags) are used on the stack. And if we reuse
> the nr_regs slot, no stack will be consumed. However, it will make
> thing complex, which I don't think we should do.
>
> > Sigh.
> > I understand that cookie from one session shouldn't interfere
> > with another, but it's all getting quite complex
> > especially when everything is in assembly.
> > And this is just x86 JIT. Other JITs would need to copy
> > this complex logic :(
>
> Without the session cookie, it will be much easier to implement
> in another arch. And with the hepler of AI(such as cursor), it can
> be done easily ;)

The reality is the opposite. We see plenty of AI generated garbage.
Please stay human.

>
> > At this point I'm not sure that "symmetry with kprobe_multi_session"
> > is justified as a reason to add all that.
> > We don't have a kprobe_session for individual kprobes after all.
>
> As for my case, the tracing session can make my code much
> simpler, as I always use the fentry+fexit to hook a function. And
> the fexit skipping according to the return value of fentry can also
> achieve better performance.

I don't buy the argument that 'if (cond) goto skip_fexit_prog'
in the generated trampoline is measurably faster than
'if (cond) return' inside the fexit program.

> AFAIT, the mast usage of session cookie in kprobe is passing the
> function arguments to the exit. For tracing, we can get the args
> in the fexit. So the session cookie in tracing is not as important as
> in kprobe.

Since kprobe_multi was introduced, retsnoop and tetragon adopted
it to do mass attach, and both use bpf_get_attach_cookie().
While both don't use bpf_session_cookie().
Searching things around I also didn't find a single real user
of bpf_session_cookie() other than selftests/bpf and Jiri's slides :)

So, doing all this work in trampoline for bpf_session_cookie()
doesn't seem warranted, but with that doing session in trampoline
also doesn't look useful, since the only benefit vs a pair
of fentry/fexit is skip of fexit, which can be done already.
Plus complexity in all JITs... so, I say, let's shelve the whole thing for now.
Re: [PATCH bpf-next v3 4/7] bpf,x86: add tracing session supporting for x86_64
Posted by Leon Hwang 3 months, 1 week ago

On 1/11/25 01:57, Alexei Starovoitov wrote:
> On Thu, Oct 30, 2025 at 8:36 PM Menglong Dong <menglong8.dong@gmail.com> wrote:
>>
>> On Fri, Oct 31, 2025 at 9:42 AM Alexei Starovoitov
>> <alexei.starovoitov@gmail.com> wrote:
>>>
>>> On Sat, Oct 25, 2025 at 8:02 PM Menglong Dong <menglong8.dong@gmail.com> wrote:

[...]

>>
>> Without the session cookie, it will be much easier to implement
>> in another arch. And with the hepler of AI(such as cursor), it can
>> be done easily ;)
>
> The reality is the opposite. We see plenty of AI generated garbage.
> Please stay human.
>
>>
>>> At this point I'm not sure that "symmetry with kprobe_multi_session"
>>> is justified as a reason to add all that.
>>> We don't have a kprobe_session for individual kprobes after all.
>>
>> As for my case, the tracing session can make my code much
>> simpler, as I always use the fentry+fexit to hook a function. And
>> the fexit skipping according to the return value of fentry can also
>> achieve better performance.
>
> I don't buy the argument that 'if (cond) goto skip_fexit_prog'
> in the generated trampoline is measurably faster than
> 'if (cond) return' inside the fexit program.
>
>> AFAIT, the mast usage of session cookie in kprobe is passing the
>> function arguments to the exit. For tracing, we can get the args
>> in the fexit. So the session cookie in tracing is not as important as
>> in kprobe.
>
> Since kprobe_multi was introduced, retsnoop and tetragon adopted
> it to do mass attach, and both use bpf_get_attach_cookie().
> While both don't use bpf_session_cookie().
> Searching things around I also didn't find a single real user
> of bpf_session_cookie() other than selftests/bpf and Jiri's slides :)
>
> So, doing all this work in trampoline for bpf_session_cookie()
> doesn't seem warranted, but with that doing session in trampoline
> also doesn't look useful, since the only benefit vs a pair
> of fentry/fexit is skip of fexit, which can be done already.
> Plus complexity in all JITs... so, I say, let's shelve the whole thing for now.
>

As for bpfsnoop[1], the new attach type is indeed helpful.

For example, when tracing hundreds of kernel functions with both fentry
and fexit programs on a 48-core bare-metal server, the following results
were observed:

bpfsnoop -k 'tcp_connect' --output-fgraph --limit-events 1 -D
2025/11/03 13:32:11 Tracing 586 tracees costs 10.190874525s
2025/11/03 13:32:14 bpfsnoop is exiting..
2025/11/03 13:32:45 Untracing 586 tracees costs 30.667462289s

With the new attach type, about half of the time can be saved.

For bpfsnoop, the return-value control could help avoid redundant
filtering in fexit programs, though it's not strictly necessary.

For instance, when tracing udp_rcv with both packet and argument filters:

bpfsnoop -k '(b)udp_rcv' --filter-pkt 'host 1.1.1.1 and udp src port 53'
--filter-arg 'skb->dev->ifindex == 6' -v
2025/11/03 13:52:55 Tracing(fentry) kernel function udp_rcv
2025/11/03 13:52:55 Tracing(fexit) kernel function udp_rcv

With return-value control, the filtering in fexit could be skipped.

Links:
[1] https://github.com/bpfsnoop/bpfsnoop

Thanks,
Leon