Add BPF_TRACE_SESSION supporting to x86_64, including:
1. clear the return value in the stack before fentry to make the fentry
of the fsession can only get 0 with bpf_get_func_ret(). If we can limit
that bpf_get_func_ret() can only be used in the
"bpf_fsession_is_return() == true" code path, we don't need do this
thing anymore.
2. clear all the session cookies' value in the stack. If we can make sure
that the reading to session cookie can only be done after initialize in
the verifier, we don't need this anymore.
2. store the index of the cookie to ctx[-1] before the calling to fsession
3. store the "is_return" flag to ctx[-1] before the calling to fexit of
the fsession.
Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
Co-developed-by: Leon Hwang <leon.hwang@linux.dev>
Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
v4:
- some adjustment to the 1st patch, such as we get the fsession prog from
fentry and fexit hlist
- remove the supporting of skipping fexit with fentry return non-zero
v2:
- add session cookie support
- add the session stuff after return value, instead of before nr_args
---
arch/x86/net/bpf_jit_comp.c | 36 +++++++++++++++++++++++++++++++-----
1 file changed, 31 insertions(+), 5 deletions(-)
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 8cbeefb26192..99b0223374bd 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -3086,12 +3086,17 @@ static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond)
static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
struct bpf_tramp_links *tl, int stack_size,
int run_ctx_off, bool save_ret,
- void *image, void *rw_image)
+ void *image, void *rw_image, u64 nr_regs)
{
int i;
u8 *prog = *pprog;
for (i = 0; i < tl->nr_links; i++) {
+ if (tl->links[i]->link.prog->call_session_cookie) {
+ /* 'stack_size + 8' is the offset of nr_regs in stack */
+ emit_st_r0_imm64(&prog, nr_regs, stack_size + 8);
+ nr_regs -= (1 << BPF_TRAMP_M_COOKIE);
+ }
if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size,
run_ctx_off, save_ret, image, rw_image))
return -EINVAL;
@@ -3208,8 +3213,9 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
struct bpf_tramp_links *tlinks,
void *func_addr)
{
- int i, ret, nr_regs = m->nr_args, stack_size = 0;
- int regs_off, nregs_off, ip_off, run_ctx_off, arg_stack_off, rbx_off;
+ int i, ret, nr_regs = m->nr_args, cookie_cnt, stack_size = 0;
+ int regs_off, nregs_off, ip_off, run_ctx_off, arg_stack_off, rbx_off,
+ cookie_off;
struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
@@ -3282,6 +3288,11 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
ip_off = stack_size;
+ cookie_cnt = bpf_fsession_cookie_cnt(tlinks);
+ /* room for session cookies */
+ stack_size += cookie_cnt * 8;
+ cookie_off = stack_size;
+
stack_size += 8;
rbx_off = stack_size;
@@ -3372,9 +3383,19 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
}
}
+ if (bpf_fsession_cnt(tlinks)) {
+ /* clear all the session cookies' value */
+ for (int i = 0; i < cookie_cnt; i++)
+ emit_st_r0_imm64(&prog, 0, cookie_off - 8 * i);
+ /* clear the return value to make sure fentry always get 0 */
+ emit_st_r0_imm64(&prog, 0, 8);
+ nr_regs += (((cookie_off - regs_off) / 8) << BPF_TRAMP_M_COOKIE);
+ }
+
if (fentry->nr_links) {
if (invoke_bpf(m, &prog, fentry, regs_off, run_ctx_off,
- flags & BPF_TRAMP_F_RET_FENTRY_RET, image, rw_image))
+ flags & BPF_TRAMP_F_RET_FENTRY_RET, image, rw_image,
+ nr_regs))
return -EINVAL;
}
@@ -3434,9 +3455,14 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
}
}
+ /* set the "is_return" flag for fsession */
+ nr_regs += (1 << BPF_TRAMP_M_IS_RETURN);
+ if (bpf_fsession_cnt(tlinks))
+ emit_st_r0_imm64(&prog, nr_regs, nregs_off);
+
if (fexit->nr_links) {
if (invoke_bpf(m, &prog, fexit, regs_off, run_ctx_off,
- false, image, rw_image)) {
+ false, image, rw_image, nr_regs)) {
ret = -EINVAL;
goto cleanup;
}
--
2.52.0
On Wed, Dec 17, 2025 at 1:55 AM Menglong Dong <menglong8.dong@gmail.com> wrote:
>
> Add BPF_TRACE_SESSION supporting to x86_64, including:
>
> 1. clear the return value in the stack before fentry to make the fentry
> of the fsession can only get 0 with bpf_get_func_ret(). If we can limit
> that bpf_get_func_ret() can only be used in the
> "bpf_fsession_is_return() == true" code path, we don't need do this
> thing anymore.
What does bpf_get_func_ret() return today for fentry? zero or just
random garbage? If the latter, we can keep the same semantics for
fsession on entry. Ultimately, result of bpf_get_func_ret() is
meaningless outside of fexit/session-exit
>
> 2. clear all the session cookies' value in the stack. If we can make sure
> that the reading to session cookie can only be done after initialize in
> the verifier, we don't need this anymore.
>
> 2. store the index of the cookie to ctx[-1] before the calling to fsession
>
> 3. store the "is_return" flag to ctx[-1] before the calling to fexit of
> the fsession.
>
> Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
> Co-developed-by: Leon Hwang <leon.hwang@linux.dev>
> Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
> ---
> v4:
> - some adjustment to the 1st patch, such as we get the fsession prog from
> fentry and fexit hlist
> - remove the supporting of skipping fexit with fentry return non-zero
>
> v2:
> - add session cookie support
> - add the session stuff after return value, instead of before nr_args
> ---
> arch/x86/net/bpf_jit_comp.c | 36 +++++++++++++++++++++++++++++++-----
> 1 file changed, 31 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
> index 8cbeefb26192..99b0223374bd 100644
> --- a/arch/x86/net/bpf_jit_comp.c
> +++ b/arch/x86/net/bpf_jit_comp.c
> @@ -3086,12 +3086,17 @@ static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond)
> static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
> struct bpf_tramp_links *tl, int stack_size,
> int run_ctx_off, bool save_ret,
> - void *image, void *rw_image)
> + void *image, void *rw_image, u64 nr_regs)
> {
> int i;
> u8 *prog = *pprog;
>
> for (i = 0; i < tl->nr_links; i++) {
> + if (tl->links[i]->link.prog->call_session_cookie) {
> + /* 'stack_size + 8' is the offset of nr_regs in stack */
> + emit_st_r0_imm64(&prog, nr_regs, stack_size + 8);
> + nr_regs -= (1 << BPF_TRAMP_M_COOKIE);
you have to rename nr_regs to something more meaningful because it's
so weird to see some bit manipulations with *number of arguments*
> + }
> if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size,
> run_ctx_off, save_ret, image, rw_image))
> return -EINVAL;
> @@ -3208,8 +3213,9 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
> struct bpf_tramp_links *tlinks,
> void *func_addr)
> {
> - int i, ret, nr_regs = m->nr_args, stack_size = 0;
> - int regs_off, nregs_off, ip_off, run_ctx_off, arg_stack_off, rbx_off;
> + int i, ret, nr_regs = m->nr_args, cookie_cnt, stack_size = 0;
> + int regs_off, nregs_off, ip_off, run_ctx_off, arg_stack_off, rbx_off,
> + cookie_off;
if it doesn't fit on a single line, just `int cookie_off;` on a
separate line, why wrap the line?
> struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
> struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
> struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
[...]
© 2016 - 2026 Red Hat, Inc.