[v9] bpf: fsession support

[PATCH bpf-next v9 05/11] bpf: support fsession for bpf_session_cookie

Posted by Menglong Dong 4 weeks, 1 day ago

Implement session cookie for fsession. In order to limit the stack usage,
we make 4 as the maximum of the cookie count.

The offset of the current cookie is stored in the
"(ctx[-1] >> BPF_TRAMP_M_COOKIE) & 0xFF". Therefore, we can get the
session cookie with ctx[-offset].

The stack will look like this:

  return value	-> 8 bytes
  argN		-> 8 bytes
  ...
  arg1		-> 8 bytes
  nr_args	-> 8 bytes
  ip (optional)	-> 8 bytes
  cookie2	-> 8 bytes
  cookie1	-> 8 bytes

Implement and inline the bpf_session_cookie() for the fsession in the
verifier.

Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
---
v9:
- remove the definition of bpf_fsession_cookie()

v7:
- reuse bpf_session_cookie() instead of introduce new kfunc

v5:
- remove "cookie_cnt" in struct bpf_trampoline

v4:
- limit the maximum of the cookie count to 4
- store the session cookies before nr_regs in stack
---
 include/linux/bpf.h     | 15 +++++++++++++++
 kernel/bpf/trampoline.c | 13 +++++++++++--
 kernel/bpf/verifier.c   | 22 +++++++++++++++++++++-
 3 files changed, 47 insertions(+), 3 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 2640ec2157e1..a416050e0dd2 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1231,6 +1231,7 @@ enum {
 
 #define BPF_TRAMP_M_NR_ARGS	0
 #define BPF_TRAMP_M_IS_RETURN	8
+#define BPF_TRAMP_M_COOKIE	9
 
 struct bpf_tramp_links {
 	struct bpf_tramp_link *links[BPF_MAX_TRAMP_LINKS];
@@ -1783,6 +1784,7 @@ struct bpf_prog {
 				enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */
 				call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */
 				call_get_func_ip:1, /* Do we call get_func_ip() */
+				call_session_cookie:1, /* Do we call bpf_session_cookie() */
 				tstamp_type_access:1, /* Accessed __sk_buff->tstamp_type */
 				sleepable:1;	/* BPF program is sleepable */
 	enum bpf_prog_type	type;		/* Type of BPF program */
@@ -2191,6 +2193,19 @@ static inline int bpf_fsession_cnt(struct bpf_tramp_links *links)
 	return cnt;
 }
 
+static inline int bpf_fsession_cookie_cnt(struct bpf_tramp_links *links)
+{
+	struct bpf_tramp_links fentries = links[BPF_TRAMP_FENTRY];
+	int cnt = 0;
+
+	for (int i = 0; i < links[BPF_TRAMP_FENTRY].nr_links; i++) {
+		if (fentries.links[i]->link.prog->call_session_cookie)
+			cnt++;
+	}
+
+	return cnt;
+}
+
 int bpf_prog_ctx_arg_info_init(struct bpf_prog *prog,
 			       const struct bpf_ctx_arg_aux *info, u32 cnt);
 
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 11e043049d68..29b4e00d860c 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -592,6 +592,8 @@ static int bpf_freplace_check_tgt_prog(struct bpf_prog *tgt_prog)
 	return 0;
 }
 
+#define BPF_TRAMP_MAX_COOKIES 4
+
 static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link,
 				      struct bpf_trampoline *tr,
 				      struct bpf_prog *tgt_prog)
@@ -600,7 +602,7 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link,
 	struct bpf_tramp_link *link_exiting;
 	struct bpf_fsession_link *fslink;
 	struct hlist_head *prog_list;
-	int err = 0;
+	int err = 0, cookie_cnt = 0;
 	int cnt = 0, i;
 
 	kind = bpf_attach_type_to_tramp(link->link.prog);
@@ -637,11 +639,18 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link,
 		/* prog already linked */
 		return -EBUSY;
 	hlist_for_each_entry(link_exiting, prog_list, tramp_hlist) {
-		if (link_exiting->link.prog != link->link.prog)
+		if (link_exiting->link.prog != link->link.prog) {
+			if (kind == BPF_TRAMP_FSESSION &&
+			    link_exiting->link.prog->call_session_cookie)
+				cookie_cnt++;
 			continue;
+		}
 		/* prog already linked */
 		return -EBUSY;
 	}
+	if (link->link.prog->call_session_cookie &&
+	    cookie_cnt >= BPF_TRAMP_MAX_COOKIES)
+		return -E2BIG;
 
 	hlist_add_head(&link->tramp_hlist, prog_list);
 	if (kind == BPF_TRAMP_FSESSION) {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 1b0292a03186..b91fd8af2393 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -12508,7 +12508,8 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
 	bool arg_mem_size = false;
 
 	if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] ||
-	    meta->func_id == special_kfunc_list[KF_bpf_session_is_return])
+	    meta->func_id == special_kfunc_list[KF_bpf_session_is_return] ||
+	    meta->func_id == special_kfunc_list[KF_bpf_session_cookie])
 		return KF_ARG_PTR_TO_CTX;
 
 	if (argno + 1 < nargs &&
@@ -14294,6 +14295,9 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 			return err;
 	}
 
+	if (meta.func_id == special_kfunc_list[KF_bpf_session_cookie])
+		env->prog->call_session_cookie = true;
+
 	return 0;
 }
 
@@ -22571,6 +22575,22 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 		insn_buf[1] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, BPF_TRAMP_M_IS_RETURN);
 		insn_buf[2] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1);
 		*cnt = 3;
+	} else if (desc->func_id == special_kfunc_list[KF_bpf_session_cookie] &&
+		   env->prog->expected_attach_type == BPF_TRACE_FSESSION) {
+		/* inline bpf_session_cookie() for fsession:
+		 *   __u64 *bpf_session_cookie(void *ctx)
+		 *   {
+		 *       u64 off = (((u64 *)ctx)[-1] >> BPF_TRAMP_M_COOKIE) & 0xFF;
+		 *       return &((u64 *)ctx)[-off];
+		 *   }
+		 */
+		insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
+		insn_buf[1] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, BPF_TRAMP_M_COOKIE);
+		insn_buf[2] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF);
+		insn_buf[3] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
+		insn_buf[4] = BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1);
+		insn_buf[5] = BPF_ALU64_IMM(BPF_NEG, BPF_REG_0, 0);
+		*cnt = 6;
 	}
 
 	if (env->insn_aux_data[insn_idx].arg_prog) {
-- 
2.52.0

Re: [PATCH bpf-next v9 05/11] bpf: support fsession for bpf_session_cookie

Posted by Andrii Nakryiko 3 weeks, 5 days ago

On Sat, Jan 10, 2026 at 6:12 AM Menglong Dong <menglong8.dong@gmail.com> wrote:
>
> Implement session cookie for fsession. In order to limit the stack usage,
> we make 4 as the maximum of the cookie count.

This 4 is so random, tbh. Do we need to artificially limit it? Even if
all BPF_MAX_TRAMP_LINKS = 38 where using session cookies, it would be
304 bytes. Not insignificant, but also not world-ending and IMO so
unlikely that I wouldn't add extra limits at all.

>
> The offset of the current cookie is stored in the
> "(ctx[-1] >> BPF_TRAMP_M_COOKIE) & 0xFF". Therefore, we can get the
> session cookie with ctx[-offset].


ctx here is assumed u64 *, right? So offset is in 8-byte units? Can
you clarify please?

>
> The stack will look like this:
>
>   return value  -> 8 bytes
>   argN          -> 8 bytes
>   ...
>   arg1          -> 8 bytes
>   nr_args       -> 8 bytes
>   ip (optional) -> 8 bytes
>   cookie2       -> 8 bytes
>   cookie1       -> 8 bytes
>
> Implement and inline the bpf_session_cookie() for the fsession in the
> verifier.
>
> Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
> ---
> v9:
> - remove the definition of bpf_fsession_cookie()
>
> v7:
> - reuse bpf_session_cookie() instead of introduce new kfunc
>
> v5:
> - remove "cookie_cnt" in struct bpf_trampoline
>
> v4:
> - limit the maximum of the cookie count to 4
> - store the session cookies before nr_regs in stack
> ---
>  include/linux/bpf.h     | 15 +++++++++++++++
>  kernel/bpf/trampoline.c | 13 +++++++++++--
>  kernel/bpf/verifier.c   | 22 +++++++++++++++++++++-
>  3 files changed, 47 insertions(+), 3 deletions(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 2640ec2157e1..a416050e0dd2 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -1231,6 +1231,7 @@ enum {
>
>  #define BPF_TRAMP_M_NR_ARGS    0
>  #define BPF_TRAMP_M_IS_RETURN  8
> +#define BPF_TRAMP_M_COOKIE     9

this is not wrong, but certainly weird. Why not make IS_RETURN to be
the upper bit (63) and keep cookie as a proper second byte?


(also I think all these should drop _M and have _SHIFT suffix)


>
>  struct bpf_tramp_links {
>         struct bpf_tramp_link *links[BPF_MAX_TRAMP_LINKS];
> @@ -1783,6 +1784,7 @@ struct bpf_prog {
>                                 enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */
>                                 call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */
>                                 call_get_func_ip:1, /* Do we call get_func_ip() */
> +                               call_session_cookie:1, /* Do we call bpf_session_cookie() */
>                                 tstamp_type_access:1, /* Accessed __sk_buff->tstamp_type */
>                                 sleepable:1;    /* BPF program is sleepable */
>         enum bpf_prog_type      type;           /* Type of BPF program */
> @@ -2191,6 +2193,19 @@ static inline int bpf_fsession_cnt(struct bpf_tramp_links *links)
>         return cnt;
>  }
>
> +static inline int bpf_fsession_cookie_cnt(struct bpf_tramp_links *links)
> +{
> +       struct bpf_tramp_links fentries = links[BPF_TRAMP_FENTRY];
> +       int cnt = 0;
> +
> +       for (int i = 0; i < links[BPF_TRAMP_FENTRY].nr_links; i++) {
> +               if (fentries.links[i]->link.prog->call_session_cookie)
> +                       cnt++;
> +       }
> +
> +       return cnt;
> +}
> +
>  int bpf_prog_ctx_arg_info_init(struct bpf_prog *prog,
>                                const struct bpf_ctx_arg_aux *info, u32 cnt);
>
> diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
> index 11e043049d68..29b4e00d860c 100644
> --- a/kernel/bpf/trampoline.c
> +++ b/kernel/bpf/trampoline.c
> @@ -592,6 +592,8 @@ static int bpf_freplace_check_tgt_prog(struct bpf_prog *tgt_prog)
>         return 0;
>  }
>
> +#define BPF_TRAMP_MAX_COOKIES 4
> +
>  static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link,
>                                       struct bpf_trampoline *tr,
>                                       struct bpf_prog *tgt_prog)
> @@ -600,7 +602,7 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link,
>         struct bpf_tramp_link *link_exiting;
>         struct bpf_fsession_link *fslink;
>         struct hlist_head *prog_list;
> -       int err = 0;
> +       int err = 0, cookie_cnt = 0;
>         int cnt = 0, i;
>
>         kind = bpf_attach_type_to_tramp(link->link.prog);
> @@ -637,11 +639,18 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link,
>                 /* prog already linked */
>                 return -EBUSY;
>         hlist_for_each_entry(link_exiting, prog_list, tramp_hlist) {
> -               if (link_exiting->link.prog != link->link.prog)
> +               if (link_exiting->link.prog != link->link.prog) {
> +                       if (kind == BPF_TRAMP_FSESSION &&
> +                           link_exiting->link.prog->call_session_cookie)
> +                               cookie_cnt++;
>                         continue;
> +               }
>                 /* prog already linked */
>                 return -EBUSY;
>         }
> +       if (link->link.prog->call_session_cookie &&
> +           cookie_cnt >= BPF_TRAMP_MAX_COOKIES)
> +               return -E2BIG;
>
>         hlist_add_head(&link->tramp_hlist, prog_list);
>         if (kind == BPF_TRAMP_FSESSION) {
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 1b0292a03186..b91fd8af2393 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -12508,7 +12508,8 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
>         bool arg_mem_size = false;
>
>         if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] ||
> -           meta->func_id == special_kfunc_list[KF_bpf_session_is_return])
> +           meta->func_id == special_kfunc_list[KF_bpf_session_is_return] ||
> +           meta->func_id == special_kfunc_list[KF_bpf_session_cookie])
>                 return KF_ARG_PTR_TO_CTX;
>
>         if (argno + 1 < nargs &&
> @@ -14294,6 +14295,9 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
>                         return err;
>         }
>
> +       if (meta.func_id == special_kfunc_list[KF_bpf_session_cookie])
> +               env->prog->call_session_cookie = true;
> +
>         return 0;
>  }
>
> @@ -22571,6 +22575,22 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
>                 insn_buf[1] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, BPF_TRAMP_M_IS_RETURN);
>                 insn_buf[2] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1);
>                 *cnt = 3;
> +       } else if (desc->func_id == special_kfunc_list[KF_bpf_session_cookie] &&
> +                  env->prog->expected_attach_type == BPF_TRACE_FSESSION) {
> +               /* inline bpf_session_cookie() for fsession:
> +                *   __u64 *bpf_session_cookie(void *ctx)
> +                *   {
> +                *       u64 off = (((u64 *)ctx)[-1] >> BPF_TRAMP_M_COOKIE) & 0xFF;
> +                *       return &((u64 *)ctx)[-off];
> +                *   }
> +                */
> +               insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
> +               insn_buf[1] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, BPF_TRAMP_M_COOKIE);
> +               insn_buf[2] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF);
> +               insn_buf[3] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
> +               insn_buf[4] = BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1);
> +               insn_buf[5] = BPF_ALU64_IMM(BPF_NEG, BPF_REG_0, 0);
> +               *cnt = 6;
>         }
>
>         if (env->insn_aux_data[insn_idx].arg_prog) {
> --
> 2.52.0
>

Re: [PATCH bpf-next v9 05/11] bpf: support fsession for bpf_session_cookie

Posted by Alexei Starovoitov 3 weeks, 5 days ago

On Tue, Jan 13, 2026 at 5:24 PM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Sat, Jan 10, 2026 at 6:12 AM Menglong Dong <menglong8.dong@gmail.com> wrote:
> >
> > Implement session cookie for fsession. In order to limit the stack usage,
> > we make 4 as the maximum of the cookie count.
>
> This 4 is so random, tbh. Do we need to artificially limit it? Even if
> all BPF_MAX_TRAMP_LINKS = 38 where using session cookies, it would be
> 304 bytes. Not insignificant, but also not world-ending and IMO so
> unlikely that I wouldn't add extra limits at all.

I forgot that we already have BPF_MAX_TRAMP_LINKS limit for the total
number of progs. I guess extra 8 bytes per fsession prog isn't that bad.

Re: [PATCH bpf-next v9 05/11] bpf: support fsession for bpf_session_cookie

Posted by bot+bpf-ci@kernel.org 4 weeks, 1 day ago

> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 2640ec2157e1..a416050e0dd2 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -1231,6 +1231,7 @@ enum {
>
>  #define BPF_TRAMP_M_NR_ARGS	0
>  #define BPF_TRAMP_M_IS_RETURN	8
> +#define BPF_TRAMP_M_COOKIE	9

[ ... ]

> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 1b0292a03186..b91fd8af2393 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c

[ ... ]

> @@ -22571,6 +22575,22 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
>  		insn_buf[1] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, BPF_TRAMP_M_IS_RETURN);
>  		insn_buf[2] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1);
>  		*cnt = 3;
> +	} else if (desc->func_id == special_kfunc_list[KF_bpf_session_cookie] &&
> +		   env->prog->expected_attach_type == BPF_TRACE_FSESSION) {
> +		/* inline bpf_session_cookie() for fsession:
> +		 *   __u64 *bpf_session_cookie(void *ctx)
> +		 *   {
> +		 *       u64 off = (((u64 *)ctx)[-1] >> BPF_TRAMP_M_COOKIE) & 0xFF;
> +		 *       return &((u64 *)ctx)[-off];
> +		 *   }
> +		 */
> +		insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
> +		insn_buf[1] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, BPF_TRAMP_M_COOKIE);
> +		insn_buf[2] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF);
> +		insn_buf[3] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
> +		insn_buf[4] = BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1);
> +		insn_buf[5] = BPF_ALU64_IMM(BPF_NEG, BPF_REG_0, 0);
                        ^^^^^^^^

This inline code reads the cookie offset from (ctx[-1] >> 9) & 0xFF, but
does the x86 trampoline actually store the cookie offset at bit position 9?

Looking at arch/x86/net/bpf_jit_comp.c in __arch_prepare_bpf_trampoline(),
the trampoline stores nr_regs to the nregs_off location:

    emit_mov_imm64(&prog, BPF_REG_0, 0, (u32) nr_regs);
    emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -nregs_off);

This sets ctx[-1] to a small value like 0x0000000000000006, with the high
bits all zero. When the inline code executes:

    off = (ctx[-1] >> 9) & 0xFF  --> off = 0

The function then returns &ctx[-0] = ctx itself, which points to the
function arguments instead of the session cookie.

Is there a missing dependency on a later commit in the series? Commit
983968c39241 ("bpf,x86: add fsession support for x86_64") appears to add
the trampoline code that stores the cookie offset, but it comes after this
commit in the patch series.

> +		*cnt = 6;
> +	}


---
AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md

CI run summary: https://github.com/kernel-patches/bpf/actions/runs/20879693280