[PATCH RFC bpf-next 3/5] bpf,x86: add tracing session supporting for x86_64

Menglong Dong posted 5 patches 3 months, 3 weeks ago
There is a newer version of this series
[PATCH RFC bpf-next 3/5] bpf,x86: add tracing session supporting for x86_64
Posted by Menglong Dong 3 months, 3 weeks ago
Add BPF_TRACE_SESSION supporting to x86_64. invoke_bpf_session_entry and
invoke_bpf_session_exit is introduced for this purpose.

In invoke_bpf_session_entry(), we will check if the return value of the
fentry is 0, and clear the corresponding flag if not. And in
invoke_bpf_session_exit(), we will check if the corresponding flag is
set. If not set, the fexit will be skipped.

Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
Co-developed-by: Leon Hwang <leon.hwang@linux.dev>
Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
 arch/x86/net/bpf_jit_comp.c | 115 +++++++++++++++++++++++++++++++++++-
 1 file changed, 114 insertions(+), 1 deletion(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index d4c93d9e73e4..0586b96ed529 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -3108,6 +3108,97 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
 	return 0;
 }
 
+static int invoke_bpf_session_entry(const struct btf_func_model *m, u8 **pprog,
+				    struct bpf_tramp_links *tl, int stack_size,
+				    int run_ctx_off, int session_off,
+				    void *image, void *rw_image)
+{
+	u64 session_flags;
+	u8 *prog = *pprog;
+	u8 *jmp_insn;
+	int i;
+
+	/* clear the session flags:
+	 *
+	 *   xor rax, rax
+	 *   mov QWORD PTR [rbp - session_off], rax
+	 */
+	EMIT3(0x48, 0x31, 0xC0);
+	emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -session_off);
+
+	for (i = 0; i < tl->nr_links; i++) {
+		if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, run_ctx_off, true,
+				    image, rw_image))
+			return -EINVAL;
+
+		/* fentry prog stored return value into [rbp - 8]. Emit:
+		 * if (*(u64 *)(rbp - 8) !=  0)
+		 *	*(u64 *)(rbp - session_off) |= (1 << (i + 1));
+		 */
+		/* cmp QWORD PTR [rbp - 0x8], 0x0 */
+		EMIT4(0x48, 0x83, 0x7d, 0xf8); EMIT1(0x00);
+		/* emit 2 nops that will be replaced with JE insn */
+		jmp_insn = prog;
+		emit_nops(&prog, 2);
+
+		session_flags = (1ULL << (i + 1));
+		/* mov rax, $session_flags */
+		emit_mov_imm64(&prog, BPF_REG_0, session_flags >> 32, (u32) session_flags);
+		/* or QWORD PTR [rbp - session_off], rax */
+		EMIT2(0x48, 0x09);
+		emit_insn_suffix(&prog, BPF_REG_FP, BPF_REG_0, -session_off);
+
+		jmp_insn[0] = X86_JE;
+		jmp_insn[1] = prog - jmp_insn - 2;
+	}
+
+	*pprog = prog;
+	return 0;
+}
+
+static int invoke_bpf_session_exit(const struct btf_func_model *m, u8 **pprog,
+				   struct bpf_tramp_links *tl, int stack_size,
+				   int run_ctx_off, int session_off,
+				   void *image, void *rw_image)
+{
+	u64 session_flags;
+	u8 *prog = *pprog;
+	u8 *jmp_insn;
+	int i;
+
+	/* set the bpf_trace_is_exit flag to the session flags */
+	/* mov rax, 1 */
+	emit_mov_imm32(&prog, false, BPF_REG_0, 1);
+	/* or QWORD PTR [rbp - session_off], rax */
+	EMIT2(0x48, 0x09);
+	emit_insn_suffix(&prog, BPF_REG_FP, BPF_REG_0, -session_off);
+
+	for (i = 0; i < tl->nr_links; i++) {
+		/* check if (1 << (i+1)) is set in the session flags, and
+		 * skip the execution of the fexit program if it is.
+		 */
+		session_flags = 1ULL << (i + 1);
+		/* mov rax, $session_flags */
+		emit_mov_imm64(&prog, BPF_REG_1, session_flags >> 32, (u32) session_flags);
+		/* test QWORD PTR [rbp - session_off], rax */
+		EMIT2(0x48, 0x85);
+		emit_insn_suffix(&prog, BPF_REG_FP, BPF_REG_1, -session_off);
+		/* emit 2 nops that will be replaced with JE insn */
+		jmp_insn = prog;
+		emit_nops(&prog, 2);
+
+		if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, run_ctx_off, false,
+				    image, rw_image))
+			return -EINVAL;
+
+		jmp_insn[0] = X86_JNE;
+		jmp_insn[1] = prog - jmp_insn - 2;
+	}
+
+	*pprog = prog;
+	return 0;
+}
+
 /* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */
 #define LOAD_TRAMP_TAIL_CALL_CNT_PTR(stack)	\
 	__LOAD_TCC_PTR(-round_up(stack, 8) - 8)
@@ -3179,8 +3270,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
 					 void *func_addr)
 {
 	int i, ret, nr_regs = m->nr_args, stack_size = 0;
-	int regs_off, nregs_off, ip_off, run_ctx_off, arg_stack_off, rbx_off;
+	int regs_off, nregs_off, session_off, ip_off, run_ctx_off,
+	    arg_stack_off, rbx_off;
 	struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
+	struct bpf_tramp_links *session = &tlinks[BPF_TRAMP_SESSION];
 	struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
 	struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
 	void *orig_call = func_addr;
@@ -3222,6 +3315,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
 	 *
 	 * RBP - nregs_off [ regs count	     ]  always
 	 *
+	 * RBP - session_off [ session flags ] tracing session
+	 *
 	 * RBP - ip_off    [ traced function ]  BPF_TRAMP_F_IP_ARG flag
 	 *
 	 * RBP - rbx_off   [ rbx value       ]  always
@@ -3246,6 +3341,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
 	/* regs count  */
 	stack_size += 8;
 	nregs_off = stack_size;
+	stack_size += 8;
+	session_off = stack_size;
 
 	if (flags & BPF_TRAMP_F_IP_ARG)
 		stack_size += 8; /* room for IP address argument */
@@ -3345,6 +3442,13 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
 			return -EINVAL;
 	}
 
+	if (session->nr_links) {
+		if (invoke_bpf_session_entry(m, &prog, session, regs_off,
+					     run_ctx_off, session_off,
+					     image, rw_image))
+			return -EINVAL;
+	}
+
 	if (fmod_ret->nr_links) {
 		branches = kcalloc(fmod_ret->nr_links, sizeof(u8 *),
 				   GFP_KERNEL);
@@ -3409,6 +3513,15 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
 		}
 	}
 
+	if (session->nr_links) {
+		if (invoke_bpf_session_exit(m, &prog, session, regs_off,
+					    run_ctx_off, session_off,
+					    image, rw_image)) {
+			ret = -EINVAL;
+			goto cleanup;
+		}
+	}
+
 	if (flags & BPF_TRAMP_F_RESTORE_REGS)
 		restore_regs(m, &prog, regs_off);
 
-- 
2.51.0
Re: [PATCH RFC bpf-next 3/5] bpf,x86: add tracing session supporting for x86_64
Posted by Alexei Starovoitov 3 months, 2 weeks ago
On Sat, Oct 18, 2025 at 7:21 AM Menglong Dong <menglong8.dong@gmail.com> wrote:
>  /* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */
>  #define LOAD_TRAMP_TAIL_CALL_CNT_PTR(stack)    \
>         __LOAD_TCC_PTR(-round_up(stack, 8) - 8)
> @@ -3179,8 +3270,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
>                                          void *func_addr)
>  {
>         int i, ret, nr_regs = m->nr_args, stack_size = 0;
> -       int regs_off, nregs_off, ip_off, run_ctx_off, arg_stack_off, rbx_off;
> +       int regs_off, nregs_off, session_off, ip_off, run_ctx_off,
> +           arg_stack_off, rbx_off;
>         struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
> +       struct bpf_tramp_links *session = &tlinks[BPF_TRAMP_SESSION];
>         struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
>         struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
>         void *orig_call = func_addr;
> @@ -3222,6 +3315,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
>          *
>          * RBP - nregs_off [ regs count      ]  always
>          *
> +        * RBP - session_off [ session flags ] tracing session
> +        *
>          * RBP - ip_off    [ traced function ]  BPF_TRAMP_F_IP_ARG flag
>          *
>          * RBP - rbx_off   [ rbx value       ]  always
> @@ -3246,6 +3341,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
>         /* regs count  */
>         stack_size += 8;
>         nregs_off = stack_size;
> +       stack_size += 8;
> +       session_off = stack_size;

Unconditional stack increase? :(
Re: [PATCH RFC bpf-next 3/5] bpf,x86: add tracing session supporting for x86_64
Posted by Menglong Dong 3 months, 2 weeks ago
On Wed, Oct 22, 2025 at 2:17 AM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Sat, Oct 18, 2025 at 7:21 AM Menglong Dong <menglong8.dong@gmail.com> wrote:
> >  /* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */
> >  #define LOAD_TRAMP_TAIL_CALL_CNT_PTR(stack)    \
> >         __LOAD_TCC_PTR(-round_up(stack, 8) - 8)
> > @@ -3179,8 +3270,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
> >                                          void *func_addr)
> >  {
> >         int i, ret, nr_regs = m->nr_args, stack_size = 0;
> > -       int regs_off, nregs_off, ip_off, run_ctx_off, arg_stack_off, rbx_off;
> > +       int regs_off, nregs_off, session_off, ip_off, run_ctx_off,
> > +           arg_stack_off, rbx_off;
> >         struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
> > +       struct bpf_tramp_links *session = &tlinks[BPF_TRAMP_SESSION];
> >         struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
> >         struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
> >         void *orig_call = func_addr;
> > @@ -3222,6 +3315,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
> >          *
> >          * RBP - nregs_off [ regs count      ]  always
> >          *
> > +        * RBP - session_off [ session flags ] tracing session
> > +        *
> >          * RBP - ip_off    [ traced function ]  BPF_TRAMP_F_IP_ARG flag
> >          *
> >          * RBP - rbx_off   [ rbx value       ]  always
> > @@ -3246,6 +3341,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
> >         /* regs count  */
> >         stack_size += 8;
> >         nregs_off = stack_size;
> > +       stack_size += 8;
> > +       session_off = stack_size;
>
> Unconditional stack increase? :(

Ah, it should be conditional increase and I made a mistake here,
which will be fixed in the V2.

In fact, we can't add the session stuff here. Once we make it
conditional increase, we can't tell the location of "ip" in
bpf_get_func_ip() anymore, as we can't tell if session stuff exist
in bpf_get_func_ip().

Several solution that I come up:

1. reuse the nregs_off. It's 8-bytes, but 1-byte is enough for it.
Therefore, we can store some metadata flags to the high 7-bytes
of it, such as "SESSION_EXIST" or "IP_OFFSET". And then,
we can get the offset of the ip in bpf_get_func_ip().
It works, but it will make the code more confusing.

2. Introduce a bpf_tramp_session_run_ctx:
struct bpf_tramp_session_run_ctx {
  struct bpf_tramp_run_ctx;
  __u64 session_flags;
  __u64 session_cookie;
}
If the session exist, use the bpf_tramp_session_run_ctx in the
trampoline.
It work and simple.

3. Add the session stuff to the tail of the context, which means
after the "return value". And the stack will become this:
session cookie -> 8-bytes if session
session flags   -> 8-bytes if session
return value     -> 8-bytes
argN
.....
arg1

Both method 2 and method 3 work and simple, and I decide use
the method 3 in the V2.

Thanks!
Menglong Dong
Re: [PATCH RFC bpf-next 3/5] bpf,x86: add tracing session supporting for x86_64
Posted by Jiri Olsa 3 months, 2 weeks ago
On Sat, Oct 18, 2025 at 10:21:22PM +0800, Menglong Dong wrote:

SNIP

>  /* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */
>  #define LOAD_TRAMP_TAIL_CALL_CNT_PTR(stack)	\
>  	__LOAD_TCC_PTR(-round_up(stack, 8) - 8)
> @@ -3179,8 +3270,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
>  					 void *func_addr)
>  {
>  	int i, ret, nr_regs = m->nr_args, stack_size = 0;
> -	int regs_off, nregs_off, ip_off, run_ctx_off, arg_stack_off, rbx_off;
> +	int regs_off, nregs_off, session_off, ip_off, run_ctx_off,
> +	    arg_stack_off, rbx_off;
>  	struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
> +	struct bpf_tramp_links *session = &tlinks[BPF_TRAMP_SESSION];
>  	struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
>  	struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
>  	void *orig_call = func_addr;
> @@ -3222,6 +3315,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
>  	 *
>  	 * RBP - nregs_off [ regs count	     ]  always
>  	 *
> +	 * RBP - session_off [ session flags ] tracing session
> +	 *
>  	 * RBP - ip_off    [ traced function ]  BPF_TRAMP_F_IP_ARG flag
>  	 *
>  	 * RBP - rbx_off   [ rbx value       ]  always
> @@ -3246,6 +3341,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
>  	/* regs count  */
>  	stack_size += 8;
>  	nregs_off = stack_size;
> +	stack_size += 8;
> +	session_off = stack_size;

should this depend on session->nr_links ?

jirka

>  
>  	if (flags & BPF_TRAMP_F_IP_ARG)
>  		stack_size += 8; /* room for IP address argument */
> @@ -3345,6 +3442,13 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
>  			return -EINVAL;
>  	}
>  
> +	if (session->nr_links) {
> +		if (invoke_bpf_session_entry(m, &prog, session, regs_off,
> +					     run_ctx_off, session_off,
> +					     image, rw_image))
> +			return -EINVAL;
> +	}
> +
>  	if (fmod_ret->nr_links) {
>  		branches = kcalloc(fmod_ret->nr_links, sizeof(u8 *),
>  				   GFP_KERNEL);
> @@ -3409,6 +3513,15 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
>  		}
>  	}
>  
> +	if (session->nr_links) {
> +		if (invoke_bpf_session_exit(m, &prog, session, regs_off,
> +					    run_ctx_off, session_off,
> +					    image, rw_image)) {
> +			ret = -EINVAL;
> +			goto cleanup;
> +		}
> +	}
> +
>  	if (flags & BPF_TRAMP_F_RESTORE_REGS)
>  		restore_regs(m, &prog, regs_off);
>  
> -- 
> 2.51.0
>
Re: [PATCH RFC bpf-next 3/5] bpf,x86: add tracing session supporting for x86_64
Posted by Menglong Dong 3 months, 2 weeks ago
On 2025/10/20 16:19, Jiri Olsa wrote:
> On Sat, Oct 18, 2025 at 10:21:22PM +0800, Menglong Dong wrote:
> 
> SNIP
> 
> >  /* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */
> >  #define LOAD_TRAMP_TAIL_CALL_CNT_PTR(stack)	\
> >  	__LOAD_TCC_PTR(-round_up(stack, 8) - 8)
> > @@ -3179,8 +3270,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
> >  					 void *func_addr)
> >  {
> >  	int i, ret, nr_regs = m->nr_args, stack_size = 0;
> > -	int regs_off, nregs_off, ip_off, run_ctx_off, arg_stack_off, rbx_off;
> > +	int regs_off, nregs_off, session_off, ip_off, run_ctx_off,
> > +	    arg_stack_off, rbx_off;
> >  	struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
> > +	struct bpf_tramp_links *session = &tlinks[BPF_TRAMP_SESSION];
> >  	struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
> >  	struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
> >  	void *orig_call = func_addr;
> > @@ -3222,6 +3315,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
> >  	 *
> >  	 * RBP - nregs_off [ regs count	     ]  always
> >  	 *
> > +	 * RBP - session_off [ session flags ] tracing session
> > +	 *
> >  	 * RBP - ip_off    [ traced function ]  BPF_TRAMP_F_IP_ARG flag
> >  	 *
> >  	 * RBP - rbx_off   [ rbx value       ]  always
> > @@ -3246,6 +3341,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
> >  	/* regs count  */
> >  	stack_size += 8;
> >  	nregs_off = stack_size;
> > +	stack_size += 8;
> > +	session_off = stack_size;
> 
> should this depend on session->nr_links ?

Hmm...my mistake, it should. And this also break the bpf_get_func_ip(),
which I'll fix in the next version.

> 
> jirka
> 
> >  
> >  	if (flags & BPF_TRAMP_F_IP_ARG)
> >  		stack_size += 8; /* room for IP address argument */
> > @@ -3345,6 +3442,13 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
> >  			return -EINVAL;
> >  	}
> >  
> > +	if (session->nr_links) {
> > +		if (invoke_bpf_session_entry(m, &prog, session, regs_off,
> > +					     run_ctx_off, session_off,
> > +					     image, rw_image))
> > +			return -EINVAL;
> > +	}
> > +
> >  	if (fmod_ret->nr_links) {
> >  		branches = kcalloc(fmod_ret->nr_links, sizeof(u8 *),
> >  				   GFP_KERNEL);
> > @@ -3409,6 +3513,15 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
> >  		}
> >  	}
> >  
> > +	if (session->nr_links) {
> > +		if (invoke_bpf_session_exit(m, &prog, session, regs_off,
> > +					    run_ctx_off, session_off,
> > +					    image, rw_image)) {
> > +			ret = -EINVAL;
> > +			goto cleanup;
> > +		}
> > +	}
> > +
> >  	if (flags & BPF_TRAMP_F_RESTORE_REGS)
> >  		restore_regs(m, &prog, regs_off);
> >  
> 
>
Re: [PATCH RFC bpf-next 3/5] bpf,x86: add tracing session supporting for x86_64
Posted by Menglong Dong 3 months, 3 weeks ago
On 2025/10/18 22:21, Menglong Dong wrote:
> Add BPF_TRACE_SESSION supporting to x86_64. invoke_bpf_session_entry and
> invoke_bpf_session_exit is introduced for this purpose.
> 
> In invoke_bpf_session_entry(), we will check if the return value of the
> fentry is 0, and clear the corresponding flag if not. And in
> invoke_bpf_session_exit(), we will check if the corresponding flag is
> set. If not set, the fexit will be skipped.
> 
> Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
> Co-developed-by: Leon Hwang <leon.hwang@linux.dev>
> Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
> ---
>  arch/x86/net/bpf_jit_comp.c | 115 +++++++++++++++++++++++++++++++++++-
>  1 file changed, 114 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
> index d4c93d9e73e4..0586b96ed529 100644
> --- a/arch/x86/net/bpf_jit_comp.c
> +++ b/arch/x86/net/bpf_jit_comp.c
> @@ -3108,6 +3108,97 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
>  	return 0;
>  }
>  
> +static int invoke_bpf_session_entry(const struct btf_func_model *m, u8 **pprog,
> +				    struct bpf_tramp_links *tl, int stack_size,
> +				    int run_ctx_off, int session_off,
> +				    void *image, void *rw_image)
> +{
> +	u64 session_flags;
> +	u8 *prog = *pprog;
> +	u8 *jmp_insn;
> +	int i;
> +
> +	/* clear the session flags:
> +	 *
> +	 *   xor rax, rax
> +	 *   mov QWORD PTR [rbp - session_off], rax
> +	 */
> +	EMIT3(0x48, 0x31, 0xC0);
> +	emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -session_off);
> +
> +	for (i = 0; i < tl->nr_links; i++) {
> +		if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, run_ctx_off, true,
> +				    image, rw_image))
> +			return -EINVAL;
> +
> +		/* fentry prog stored return value into [rbp - 8]. Emit:
> +		 * if (*(u64 *)(rbp - 8) !=  0)
> +		 *	*(u64 *)(rbp - session_off) |= (1 << (i + 1));
> +		 */
> +		/* cmp QWORD PTR [rbp - 0x8], 0x0 */
> +		EMIT4(0x48, 0x83, 0x7d, 0xf8); EMIT1(0x00);
> +		/* emit 2 nops that will be replaced with JE insn */
> +		jmp_insn = prog;
> +		emit_nops(&prog, 2);
> +
> +		session_flags = (1ULL << (i + 1));
> +		/* mov rax, $session_flags */
> +		emit_mov_imm64(&prog, BPF_REG_0, session_flags >> 32, (u32) session_flags);
> +		/* or QWORD PTR [rbp - session_off], rax */
> +		EMIT2(0x48, 0x09);
> +		emit_insn_suffix(&prog, BPF_REG_FP, BPF_REG_0, -session_off);
> +
> +		jmp_insn[0] = X86_JE;
> +		jmp_insn[1] = prog - jmp_insn - 2;
> +	}
> +
> +	*pprog = prog;
> +	return 0;
> +}
> +
> +static int invoke_bpf_session_exit(const struct btf_func_model *m, u8 **pprog,
> +				   struct bpf_tramp_links *tl, int stack_size,
> +				   int run_ctx_off, int session_off,
> +				   void *image, void *rw_image)
> +{
> +	u64 session_flags;
> +	u8 *prog = *pprog;
> +	u8 *jmp_insn;
> +	int i;
> +
> +	/* set the bpf_trace_is_exit flag to the session flags */
> +	/* mov rax, 1 */
> +	emit_mov_imm32(&prog, false, BPF_REG_0, 1);
> +	/* or QWORD PTR [rbp - session_off], rax */
> +	EMIT2(0x48, 0x09);
> +	emit_insn_suffix(&prog, BPF_REG_FP, BPF_REG_0, -session_off);
> +
> +	for (i = 0; i < tl->nr_links; i++) {
> +		/* check if (1 << (i+1)) is set in the session flags, and
> +		 * skip the execution of the fexit program if it is.
> +		 */
> +		session_flags = 1ULL << (i + 1);
> +		/* mov rax, $session_flags */
> +		emit_mov_imm64(&prog, BPF_REG_1, session_flags >> 32, (u32) session_flags);
> +		/* test QWORD PTR [rbp - session_off], rax */
> +		EMIT2(0x48, 0x85);
> +		emit_insn_suffix(&prog, BPF_REG_FP, BPF_REG_1, -session_off);
> +		/* emit 2 nops that will be replaced with JE insn */
> +		jmp_insn = prog;
> +		emit_nops(&prog, 2);
> +
> +		if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, run_ctx_off, false,
> +				    image, rw_image))
> +			return -EINVAL;
> +
> +		jmp_insn[0] = X86_JNE;
> +		jmp_insn[1] = prog - jmp_insn - 2;
> +	}
> +
> +	*pprog = prog;
> +	return 0;
> +}
> +
>  /* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */
>  #define LOAD_TRAMP_TAIL_CALL_CNT_PTR(stack)	\
>  	__LOAD_TCC_PTR(-round_up(stack, 8) - 8)
> @@ -3179,8 +3270,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
>  					 void *func_addr)
>  {
>  	int i, ret, nr_regs = m->nr_args, stack_size = 0;
> -	int regs_off, nregs_off, ip_off, run_ctx_off, arg_stack_off, rbx_off;
> +	int regs_off, nregs_off, session_off, ip_off, run_ctx_off,
> +	    arg_stack_off, rbx_off;
>  	struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
> +	struct bpf_tramp_links *session = &tlinks[BPF_TRAMP_SESSION];
>  	struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
>  	struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
>  	void *orig_call = func_addr;
> @@ -3222,6 +3315,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
>  	 *
>  	 * RBP - nregs_off [ regs count	     ]  always
>  	 *
> +	 * RBP - session_off [ session flags ] tracing session
> +	 *
>  	 * RBP - ip_off    [ traced function ]  BPF_TRAMP_F_IP_ARG flag
>  	 *
>  	 * RBP - rbx_off   [ rbx value       ]  always
> @@ -3246,6 +3341,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
>  	/* regs count  */
>  	stack_size += 8;
>  	nregs_off = stack_size;
> +	stack_size += 8;
> +	session_off = stack_size;

Oops, this break bpf_get_func_ip(), which will get the ip with ctx[-2].
I'll introduce a "bpf_get_func_ip_proto_tracing_session" to fix it.

>  
>  	if (flags & BPF_TRAMP_F_IP_ARG)
>  		stack_size += 8; /* room for IP address argument */
> @@ -3345,6 +3442,13 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
>  			return -EINVAL;
>  	}
>  
> +	if (session->nr_links) {
> +		if (invoke_bpf_session_entry(m, &prog, session, regs_off,
> +					     run_ctx_off, session_off,
> +					     image, rw_image))
> +			return -EINVAL;
> +	}
> +
>  	if (fmod_ret->nr_links) {
>  		branches = kcalloc(fmod_ret->nr_links, sizeof(u8 *),
>  				   GFP_KERNEL);
> @@ -3409,6 +3513,15 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
>  		}
>  	}
>  
> +	if (session->nr_links) {
> +		if (invoke_bpf_session_exit(m, &prog, session, regs_off,
> +					    run_ctx_off, session_off,
> +					    image, rw_image)) {
> +			ret = -EINVAL;
> +			goto cleanup;
> +		}
> +	}
> +
>  	if (flags & BPF_TRAMP_F_RESTORE_REGS)
>  		restore_regs(m, &prog, regs_off);
>  
>