[v4] bpf: tracing session supporting

[PATCH bpf-next v4 6/9] bpf,x86: add tracing session supporting for x86_64

Posted by Menglong Dong 1 month, 3 weeks ago

Add BPF_TRACE_SESSION supporting to x86_64, including:

1. clear the return value in the stack before fentry to make the fentry
   of the fsession can only get 0 with bpf_get_func_ret(). If we can limit
   that bpf_get_func_ret() can only be used in the
   "bpf_fsession_is_return() == true" code path, we don't need do this
   thing anymore.

2. clear all the session cookies' value in the stack. If we can make sure
   that the reading to session cookie can only be done after initialize in
   the verifier, we don't need this anymore.

2. store the index of the cookie to ctx[-1] before the calling to fsession

3. store the "is_return" flag to ctx[-1] before the calling to fexit of
   the fsession.

Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
Co-developed-by: Leon Hwang <leon.hwang@linux.dev>
Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
v4:
- some adjustment to the 1st patch, such as we get the fsession prog from
  fentry and fexit hlist
- remove the supporting of skipping fexit with fentry return non-zero

v2:
- add session cookie support
- add the session stuff after return value, instead of before nr_args
---
 arch/x86/net/bpf_jit_comp.c | 36 +++++++++++++++++++++++++++++++-----
 1 file changed, 31 insertions(+), 5 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 8cbeefb26192..99b0223374bd 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -3086,12 +3086,17 @@ static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond)
 static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
 		      struct bpf_tramp_links *tl, int stack_size,
 		      int run_ctx_off, bool save_ret,
-		      void *image, void *rw_image)
+		      void *image, void *rw_image, u64 nr_regs)
 {
 	int i;
 	u8 *prog = *pprog;
 
 	for (i = 0; i < tl->nr_links; i++) {
+		if (tl->links[i]->link.prog->call_session_cookie) {
+			/* 'stack_size + 8' is the offset of nr_regs in stack */
+			emit_st_r0_imm64(&prog, nr_regs, stack_size + 8);
+			nr_regs -= (1 << BPF_TRAMP_M_COOKIE);
+		}
 		if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size,
 				    run_ctx_off, save_ret, image, rw_image))
 			return -EINVAL;
@@ -3208,8 +3213,9 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
 					 struct bpf_tramp_links *tlinks,
 					 void *func_addr)
 {
-	int i, ret, nr_regs = m->nr_args, stack_size = 0;
-	int regs_off, nregs_off, ip_off, run_ctx_off, arg_stack_off, rbx_off;
+	int i, ret, nr_regs = m->nr_args, cookie_cnt, stack_size = 0;
+	int regs_off, nregs_off, ip_off, run_ctx_off, arg_stack_off, rbx_off,
+	    cookie_off;
 	struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
 	struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
 	struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
@@ -3282,6 +3288,11 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
 
 	ip_off = stack_size;
 
+	cookie_cnt = bpf_fsession_cookie_cnt(tlinks);
+	/* room for session cookies */
+	stack_size += cookie_cnt * 8;
+	cookie_off = stack_size;
+
 	stack_size += 8;
 	rbx_off = stack_size;
 
@@ -3372,9 +3383,19 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
 		}
 	}
 
+	if (bpf_fsession_cnt(tlinks)) {
+		/* clear all the session cookies' value */
+		for (int i = 0; i < cookie_cnt; i++)
+			emit_st_r0_imm64(&prog, 0, cookie_off - 8 * i);
+		/* clear the return value to make sure fentry always get 0 */
+		emit_st_r0_imm64(&prog, 0, 8);
+		nr_regs += (((cookie_off - regs_off) / 8) << BPF_TRAMP_M_COOKIE);
+	}
+
 	if (fentry->nr_links) {
 		if (invoke_bpf(m, &prog, fentry, regs_off, run_ctx_off,
-			       flags & BPF_TRAMP_F_RET_FENTRY_RET, image, rw_image))
+			       flags & BPF_TRAMP_F_RET_FENTRY_RET, image, rw_image,
+			       nr_regs))
 			return -EINVAL;
 	}
 
@@ -3434,9 +3455,14 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
 		}
 	}
 
+	/* set the "is_return" flag for fsession */
+	nr_regs += (1 << BPF_TRAMP_M_IS_RETURN);
+	if (bpf_fsession_cnt(tlinks))
+		emit_st_r0_imm64(&prog, nr_regs, nregs_off);
+
 	if (fexit->nr_links) {
 		if (invoke_bpf(m, &prog, fexit, regs_off, run_ctx_off,
-			       false, image, rw_image)) {
+			       false, image, rw_image, nr_regs)) {
 			ret = -EINVAL;
 			goto cleanup;
 		}
-- 
2.52.0

Re: [PATCH bpf-next v4 6/9] bpf,x86: add tracing session supporting for x86_64

Posted by Andrii Nakryiko 1 month, 2 weeks ago

On Wed, Dec 17, 2025 at 1:55 AM Menglong Dong <menglong8.dong@gmail.com> wrote:
>
> Add BPF_TRACE_SESSION supporting to x86_64, including:
>
> 1. clear the return value in the stack before fentry to make the fentry
>    of the fsession can only get 0 with bpf_get_func_ret(). If we can limit
>    that bpf_get_func_ret() can only be used in the
>    "bpf_fsession_is_return() == true" code path, we don't need do this
>    thing anymore.

What does bpf_get_func_ret() return today for fentry? zero or just
random garbage? If the latter, we can keep the same semantics for
fsession on entry. Ultimately, result of bpf_get_func_ret() is
meaningless outside of fexit/session-exit

>
> 2. clear all the session cookies' value in the stack. If we can make sure
>    that the reading to session cookie can only be done after initialize in
>    the verifier, we don't need this anymore.
>
> 2. store the index of the cookie to ctx[-1] before the calling to fsession
>
> 3. store the "is_return" flag to ctx[-1] before the calling to fexit of
>    the fsession.
>
> Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
> Co-developed-by: Leon Hwang <leon.hwang@linux.dev>
> Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
> ---
> v4:
> - some adjustment to the 1st patch, such as we get the fsession prog from
>   fentry and fexit hlist
> - remove the supporting of skipping fexit with fentry return non-zero
>
> v2:
> - add session cookie support
> - add the session stuff after return value, instead of before nr_args
> ---
>  arch/x86/net/bpf_jit_comp.c | 36 +++++++++++++++++++++++++++++++-----
>  1 file changed, 31 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
> index 8cbeefb26192..99b0223374bd 100644
> --- a/arch/x86/net/bpf_jit_comp.c
> +++ b/arch/x86/net/bpf_jit_comp.c
> @@ -3086,12 +3086,17 @@ static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond)
>  static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
>                       struct bpf_tramp_links *tl, int stack_size,
>                       int run_ctx_off, bool save_ret,
> -                     void *image, void *rw_image)
> +                     void *image, void *rw_image, u64 nr_regs)
>  {
>         int i;
>         u8 *prog = *pprog;
>
>         for (i = 0; i < tl->nr_links; i++) {
> +               if (tl->links[i]->link.prog->call_session_cookie) {
> +                       /* 'stack_size + 8' is the offset of nr_regs in stack */
> +                       emit_st_r0_imm64(&prog, nr_regs, stack_size + 8);
> +                       nr_regs -= (1 << BPF_TRAMP_M_COOKIE);

you have to rename nr_regs to something more meaningful because it's
so weird to see some bit manipulations with *number of arguments*

> +               }
>                 if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size,
>                                     run_ctx_off, save_ret, image, rw_image))
>                         return -EINVAL;
> @@ -3208,8 +3213,9 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
>                                          struct bpf_tramp_links *tlinks,
>                                          void *func_addr)
>  {
> -       int i, ret, nr_regs = m->nr_args, stack_size = 0;
> -       int regs_off, nregs_off, ip_off, run_ctx_off, arg_stack_off, rbx_off;
> +       int i, ret, nr_regs = m->nr_args, cookie_cnt, stack_size = 0;
> +       int regs_off, nregs_off, ip_off, run_ctx_off, arg_stack_off, rbx_off,
> +           cookie_off;

if it doesn't fit on a single line, just `int cookie_off;` on a
separate line, why wrap the line?

>         struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
>         struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
>         struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];

[...]