The fsession is something that similar to kprobe session. It allow to
attach a single BPF program to both the entry and the exit of the target
functions.
Introduce the struct bpf_fsession_link, which allows to add the link to
both the fentry and fexit progs_hlist of the trampoline.
Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
Co-developed-by: Leon Hwang <leon.hwang@linux.dev>
Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
v5:
- unify the name to "fsession"
- use more explicit way in __bpf_trampoline_link_prog()
v4:
- instead of adding a new hlist to progs_hlist in trampoline, add the bpf
program to both the fentry hlist and the fexit hlist.
---
include/linux/bpf.h | 20 ++++++++++
include/uapi/linux/bpf.h | 1 +
kernel/bpf/btf.c | 2 +
kernel/bpf/syscall.c | 18 ++++++++-
kernel/bpf/trampoline.c | 40 ++++++++++++++++---
kernel/bpf/verifier.c | 12 ++++--
net/bpf/test_run.c | 1 +
net/core/bpf_sk_storage.c | 1 +
tools/include/uapi/linux/bpf.h | 1 +
.../bpf/prog_tests/tracing_failure.c | 2 +-
10 files changed, 88 insertions(+), 10 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 28d8d6b7bb1e..63e1bc29485e 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1291,6 +1291,7 @@ enum bpf_tramp_prog_type {
BPF_TRAMP_MODIFY_RETURN,
BPF_TRAMP_MAX,
BPF_TRAMP_REPLACE, /* more than MAX */
+ BPF_TRAMP_FSESSION,
};
struct bpf_tramp_image {
@@ -1854,6 +1855,11 @@ struct bpf_tracing_link {
struct bpf_prog *tgt_prog;
};
+struct bpf_fsession_link {
+ struct bpf_tracing_link link;
+ struct bpf_tramp_link fexit;
+};
+
struct bpf_raw_tp_link {
struct bpf_link link;
struct bpf_raw_event_map *btp;
@@ -2114,6 +2120,20 @@ static inline void bpf_struct_ops_desc_release(struct bpf_struct_ops_desc *st_op
#endif
+static inline int bpf_fsession_cnt(struct bpf_tramp_links *links)
+{
+ struct bpf_tramp_links fentries = links[BPF_TRAMP_FENTRY];
+ int cnt = 0;
+
+ for (int i = 0; i < links[BPF_TRAMP_FENTRY].nr_links; i++) {
+ if (fentries.links[i]->link.prog->expected_attach_type ==
+ BPF_TRACE_FSESSION)
+ cnt++;
+ }
+
+ return cnt;
+}
+
int bpf_prog_ctx_arg_info_init(struct bpf_prog *prog,
const struct bpf_ctx_arg_aux *info, u32 cnt);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 84ced3ed2d21..cd2d7c4fc6e7 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1145,6 +1145,7 @@ enum bpf_attach_type {
BPF_NETKIT_PEER,
BPF_TRACE_KPROBE_SESSION,
BPF_TRACE_UPROBE_SESSION,
+ BPF_TRACE_FSESSION,
__MAX_BPF_ATTACH_TYPE
};
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 0de8fc8a0e0b..dff3eae4b51e 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -6107,6 +6107,7 @@ static int btf_validate_prog_ctx_type(struct bpf_verifier_log *log, const struct
case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT:
case BPF_MODIFY_RETURN:
+ case BPF_TRACE_FSESSION:
/* allow u64* as ctx */
if (btf_is_int(t) && t->size == 8)
return 0;
@@ -6704,6 +6705,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
fallthrough;
case BPF_LSM_CGROUP:
case BPF_TRACE_FEXIT:
+ case BPF_TRACE_FSESSION:
/* When LSM programs are attached to void LSM hooks
* they use FEXIT trampolines and when attached to
* int LSM hooks, they use MODIFY_RETURN trampolines.
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 3080cc48bfc3..3bfaf550ad08 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3579,6 +3579,7 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog,
case BPF_PROG_TYPE_TRACING:
if (prog->expected_attach_type != BPF_TRACE_FENTRY &&
prog->expected_attach_type != BPF_TRACE_FEXIT &&
+ prog->expected_attach_type != BPF_TRACE_FSESSION &&
prog->expected_attach_type != BPF_MODIFY_RETURN) {
err = -EINVAL;
goto out_put_prog;
@@ -3628,7 +3629,21 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog,
key = bpf_trampoline_compute_key(tgt_prog, NULL, btf_id);
}
- link = kzalloc(sizeof(*link), GFP_USER);
+ if (prog->expected_attach_type == BPF_TRACE_FSESSION) {
+ struct bpf_fsession_link *fslink;
+
+ fslink = kzalloc(sizeof(*fslink), GFP_USER);
+ if (fslink) {
+ bpf_link_init(&fslink->fexit.link, BPF_LINK_TYPE_TRACING,
+ &bpf_tracing_link_lops, prog, attach_type);
+ fslink->fexit.cookie = bpf_cookie;
+ link = &fslink->link;
+ } else {
+ link = NULL;
+ }
+ } else {
+ link = kzalloc(sizeof(*link), GFP_USER);
+ }
if (!link) {
err = -ENOMEM;
goto out_put_prog;
@@ -4352,6 +4367,7 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type)
case BPF_TRACE_RAW_TP:
case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT:
+ case BPF_TRACE_FSESSION:
case BPF_MODIFY_RETURN:
return BPF_PROG_TYPE_TRACING;
case BPF_LSM_MAC:
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 976d89011b15..77d474fc973a 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -111,7 +111,7 @@ bool bpf_prog_has_trampoline(const struct bpf_prog *prog)
return (ptype == BPF_PROG_TYPE_TRACING &&
(eatype == BPF_TRACE_FENTRY || eatype == BPF_TRACE_FEXIT ||
- eatype == BPF_MODIFY_RETURN)) ||
+ eatype == BPF_MODIFY_RETURN || eatype == BPF_TRACE_FSESSION)) ||
(ptype == BPF_PROG_TYPE_LSM && eatype == BPF_LSM_MAC);
}
@@ -559,6 +559,8 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog)
return BPF_TRAMP_MODIFY_RETURN;
case BPF_TRACE_FEXIT:
return BPF_TRAMP_FEXIT;
+ case BPF_TRACE_FSESSION:
+ return BPF_TRAMP_FSESSION;
case BPF_LSM_MAC:
if (!prog->aux->attach_func_proto->type)
/* The function returns void, we cannot modify its
@@ -596,6 +598,8 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link,
{
enum bpf_tramp_prog_type kind;
struct bpf_tramp_link *link_exiting;
+ struct bpf_fsession_link *fslink;
+ struct hlist_head *prog_list;
int err = 0;
int cnt = 0, i;
@@ -621,24 +625,44 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link,
BPF_MOD_JUMP, NULL,
link->link.prog->bpf_func);
}
+ if (kind == BPF_TRAMP_FSESSION) {
+ prog_list = &tr->progs_hlist[BPF_TRAMP_FENTRY];
+ cnt++;
+ } else {
+ prog_list = &tr->progs_hlist[kind];
+ }
if (cnt >= BPF_MAX_TRAMP_LINKS)
return -E2BIG;
if (!hlist_unhashed(&link->tramp_hlist))
/* prog already linked */
return -EBUSY;
- hlist_for_each_entry(link_exiting, &tr->progs_hlist[kind], tramp_hlist) {
+ hlist_for_each_entry(link_exiting, prog_list, tramp_hlist) {
if (link_exiting->link.prog != link->link.prog)
continue;
/* prog already linked */
return -EBUSY;
}
- hlist_add_head(&link->tramp_hlist, &tr->progs_hlist[kind]);
- tr->progs_cnt[kind]++;
+ hlist_add_head(&link->tramp_hlist, prog_list);
+ if (kind == BPF_TRAMP_FSESSION) {
+ tr->progs_cnt[BPF_TRAMP_FENTRY]++;
+ fslink = container_of(link, struct bpf_fsession_link, link.link);
+ hlist_add_head(&fslink->fexit.tramp_hlist,
+ &tr->progs_hlist[BPF_TRAMP_FEXIT]);
+ tr->progs_cnt[BPF_TRAMP_FEXIT]++;
+ } else {
+ tr->progs_cnt[kind]++;
+ }
err = bpf_trampoline_update(tr, true /* lock_direct_mutex */);
if (err) {
hlist_del_init(&link->tramp_hlist);
- tr->progs_cnt[kind]--;
+ if (kind == BPF_TRAMP_FSESSION) {
+ tr->progs_cnt[BPF_TRAMP_FENTRY]--;
+ hlist_del_init(&fslink->fexit.tramp_hlist);
+ tr->progs_cnt[BPF_TRAMP_FEXIT]--;
+ } else {
+ tr->progs_cnt[kind]--;
+ }
}
return err;
}
@@ -659,6 +683,7 @@ static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
struct bpf_trampoline *tr,
struct bpf_prog *tgt_prog)
{
+ struct bpf_fsession_link *fslink;
enum bpf_tramp_prog_type kind;
int err;
@@ -672,6 +697,11 @@ static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
guard(mutex)(&tgt_prog->aux->ext_mutex);
tgt_prog->aux->is_extended = false;
return err;
+ } else if (kind == BPF_TRAMP_FSESSION) {
+ fslink = container_of(link, struct bpf_fsession_link, link.link);
+ hlist_del_init(&fslink->fexit.tramp_hlist);
+ tr->progs_cnt[BPF_TRAMP_FEXIT]--;
+ kind = BPF_TRAMP_FENTRY;
}
hlist_del_init(&link->tramp_hlist);
tr->progs_cnt[kind]--;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index a31c032b2dd6..b9714a7c3c5f 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -17402,6 +17402,7 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char
switch (env->prog->expected_attach_type) {
case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT:
+ case BPF_TRACE_FSESSION:
range = retval_range(0, 0);
break;
case BPF_TRACE_RAW_TP:
@@ -23298,6 +23299,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
if (prog_type == BPF_PROG_TYPE_TRACING &&
insn->imm == BPF_FUNC_get_func_ret) {
if (eatype == BPF_TRACE_FEXIT ||
+ eatype == BPF_TRACE_FSESSION ||
eatype == BPF_MODIFY_RETURN) {
/* Load nr_args from ctx - 8 */
insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
@@ -24242,7 +24244,8 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
if (tgt_prog->type == BPF_PROG_TYPE_TRACING &&
prog_extension &&
(tgt_prog->expected_attach_type == BPF_TRACE_FENTRY ||
- tgt_prog->expected_attach_type == BPF_TRACE_FEXIT)) {
+ tgt_prog->expected_attach_type == BPF_TRACE_FEXIT ||
+ tgt_prog->expected_attach_type == BPF_TRACE_FSESSION)) {
/* Program extensions can extend all program types
* except fentry/fexit. The reason is the following.
* The fentry/fexit programs are used for performance
@@ -24257,7 +24260,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
* beyond reasonable stack size. Hence extending fentry
* is not allowed.
*/
- bpf_log(log, "Cannot extend fentry/fexit\n");
+ bpf_log(log, "Cannot extend fentry/fexit/fsession\n");
return -EINVAL;
}
} else {
@@ -24341,6 +24344,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
case BPF_LSM_CGROUP:
case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT:
+ case BPF_TRACE_FSESSION:
if (!btf_type_is_func(t)) {
bpf_log(log, "attach_btf_id %u is not a function\n",
btf_id);
@@ -24507,6 +24511,7 @@ static bool can_be_sleepable(struct bpf_prog *prog)
case BPF_TRACE_FEXIT:
case BPF_MODIFY_RETURN:
case BPF_TRACE_ITER:
+ case BPF_TRACE_FSESSION:
return true;
default:
return false;
@@ -24588,9 +24593,10 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
tgt_info.tgt_name);
return -EINVAL;
} else if ((prog->expected_attach_type == BPF_TRACE_FEXIT ||
+ prog->expected_attach_type == BPF_TRACE_FSESSION ||
prog->expected_attach_type == BPF_MODIFY_RETURN) &&
btf_id_set_contains(&noreturn_deny, btf_id)) {
- verbose(env, "Attaching fexit/fmod_ret to __noreturn function '%s' is rejected.\n",
+ verbose(env, "Attaching fexit/fsession/fmod_ret to __noreturn function '%s' is rejected.\n",
tgt_info.tgt_name);
return -EINVAL;
}
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 655efac6f133..3b0d9bd039de 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -685,6 +685,7 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog,
switch (prog->expected_attach_type) {
case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT:
+ case BPF_TRACE_FSESSION:
if (bpf_fentry_test1(1) != 2 ||
bpf_fentry_test2(2, 3) != 5 ||
bpf_fentry_test3(4, 5, 6) != 15 ||
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index 850dd736ccd1..de111818f3a0 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -365,6 +365,7 @@ static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog)
return true;
case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT:
+ case BPF_TRACE_FSESSION:
return !!strncmp(prog->aux->attach_func_name, "bpf_sk_storage",
strlen("bpf_sk_storage"));
default:
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 6b92b0847ec2..012abaf3d4ac 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1145,6 +1145,7 @@ enum bpf_attach_type {
BPF_NETKIT_PEER,
BPF_TRACE_KPROBE_SESSION,
BPF_TRACE_UPROBE_SESSION,
+ BPF_TRACE_FSESSION,
__MAX_BPF_ATTACH_TYPE
};
diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_failure.c b/tools/testing/selftests/bpf/prog_tests/tracing_failure.c
index 10e231965589..f9f9e1cb87bf 100644
--- a/tools/testing/selftests/bpf/prog_tests/tracing_failure.c
+++ b/tools/testing/selftests/bpf/prog_tests/tracing_failure.c
@@ -73,7 +73,7 @@ static void test_tracing_deny(void)
static void test_fexit_noreturns(void)
{
test_tracing_fail_prog("fexit_noreturns",
- "Attaching fexit/fmod_ret to __noreturn function 'do_exit' is rejected.");
+ "Attaching fexit/fsession/fmod_ret to __noreturn function 'do_exit' is rejected.");
}
void test_tracing_failure(void)
--
2.52.0
On Wed, Dec 24, 2025 at 09:07:26PM +0800, Menglong Dong wrote:
SNIP
> +struct bpf_fsession_link {
> + struct bpf_tracing_link link;
> + struct bpf_tramp_link fexit;
> +};
> +
> struct bpf_raw_tp_link {
> struct bpf_link link;
> struct bpf_raw_event_map *btp;
> @@ -2114,6 +2120,20 @@ static inline void bpf_struct_ops_desc_release(struct bpf_struct_ops_desc *st_op
>
> #endif
>
> +static inline int bpf_fsession_cnt(struct bpf_tramp_links *links)
> +{
> + struct bpf_tramp_links fentries = links[BPF_TRAMP_FENTRY];
> + int cnt = 0;
> +
> + for (int i = 0; i < links[BPF_TRAMP_FENTRY].nr_links; i++) {
> + if (fentries.links[i]->link.prog->expected_attach_type ==
> + BPF_TRACE_FSESSION)
let's keep it on the single line ?
> + cnt++;
> + }
> +
> + return cnt;
> +}
> +
> int bpf_prog_ctx_arg_info_init(struct bpf_prog *prog,
> const struct bpf_ctx_arg_aux *info, u32 cnt);
>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 84ced3ed2d21..cd2d7c4fc6e7 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -1145,6 +1145,7 @@ enum bpf_attach_type {
> BPF_NETKIT_PEER,
> BPF_TRACE_KPROBE_SESSION,
> BPF_TRACE_UPROBE_SESSION,
> + BPF_TRACE_FSESSION,
> __MAX_BPF_ATTACH_TYPE
> };
>
> diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
> index 0de8fc8a0e0b..dff3eae4b51e 100644
> --- a/kernel/bpf/btf.c
> +++ b/kernel/bpf/btf.c
> @@ -6107,6 +6107,7 @@ static int btf_validate_prog_ctx_type(struct bpf_verifier_log *log, const struct
> case BPF_TRACE_FENTRY:
> case BPF_TRACE_FEXIT:
> case BPF_MODIFY_RETURN:
> + case BPF_TRACE_FSESSION:
> /* allow u64* as ctx */
> if (btf_is_int(t) && t->size == 8)
> return 0;
> @@ -6704,6 +6705,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
> fallthrough;
> case BPF_LSM_CGROUP:
> case BPF_TRACE_FEXIT:
> + case BPF_TRACE_FSESSION:
> /* When LSM programs are attached to void LSM hooks
> * they use FEXIT trampolines and when attached to
> * int LSM hooks, they use MODIFY_RETURN trampolines.
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index 3080cc48bfc3..3bfaf550ad08 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -3579,6 +3579,7 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog,
> case BPF_PROG_TYPE_TRACING:
> if (prog->expected_attach_type != BPF_TRACE_FENTRY &&
> prog->expected_attach_type != BPF_TRACE_FEXIT &&
> + prog->expected_attach_type != BPF_TRACE_FSESSION &&
> prog->expected_attach_type != BPF_MODIFY_RETURN) {
> err = -EINVAL;
> goto out_put_prog;
> @@ -3628,7 +3629,21 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog,
> key = bpf_trampoline_compute_key(tgt_prog, NULL, btf_id);
> }
>
> - link = kzalloc(sizeof(*link), GFP_USER);
> + if (prog->expected_attach_type == BPF_TRACE_FSESSION) {
> + struct bpf_fsession_link *fslink;
> +
> + fslink = kzalloc(sizeof(*fslink), GFP_USER);
> + if (fslink) {
> + bpf_link_init(&fslink->fexit.link, BPF_LINK_TYPE_TRACING,
> + &bpf_tracing_link_lops, prog, attach_type);
I don't think we need the extra exit struct bpf_link, we just need
hlist_node hook for exit program, so this should perhaps be:
struct bpf_fsession_link {
struct bpf_tracing_link link;
struct hlist_node tramp_hlist;
};
SNIP
> @@ -596,6 +598,8 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link,
> {
> enum bpf_tramp_prog_type kind;
> struct bpf_tramp_link *link_exiting;
> + struct bpf_fsession_link *fslink;
> + struct hlist_head *prog_list;
> int err = 0;
> int cnt = 0, i;
>
> @@ -621,24 +625,44 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link,
> BPF_MOD_JUMP, NULL,
> link->link.prog->bpf_func);
> }
> + if (kind == BPF_TRAMP_FSESSION) {
> + prog_list = &tr->progs_hlist[BPF_TRAMP_FENTRY];
> + cnt++;
> + } else {
> + prog_list = &tr->progs_hlist[kind];
> + }
> if (cnt >= BPF_MAX_TRAMP_LINKS)
> return -E2BIG;
> if (!hlist_unhashed(&link->tramp_hlist))
> /* prog already linked */
> return -EBUSY;
> - hlist_for_each_entry(link_exiting, &tr->progs_hlist[kind], tramp_hlist) {
> + hlist_for_each_entry(link_exiting, prog_list, tramp_hlist) {
> if (link_exiting->link.prog != link->link.prog)
> continue;
> /* prog already linked */
> return -EBUSY;
> }
>
> - hlist_add_head(&link->tramp_hlist, &tr->progs_hlist[kind]);
> - tr->progs_cnt[kind]++;
> + hlist_add_head(&link->tramp_hlist, prog_list);
> + if (kind == BPF_TRAMP_FSESSION) {
> + tr->progs_cnt[BPF_TRAMP_FENTRY]++;
> + fslink = container_of(link, struct bpf_fsession_link, link.link);
> + hlist_add_head(&fslink->fexit.tramp_hlist,
> + &tr->progs_hlist[BPF_TRAMP_FEXIT]);
> + tr->progs_cnt[BPF_TRAMP_FEXIT]++;
> + } else {
> + tr->progs_cnt[kind]++;
> + }
> err = bpf_trampoline_update(tr, true /* lock_direct_mutex */);
> if (err) {
> hlist_del_init(&link->tramp_hlist);
> - tr->progs_cnt[kind]--;
> + if (kind == BPF_TRAMP_FSESSION) {
> + tr->progs_cnt[BPF_TRAMP_FENTRY]--;
> + hlist_del_init(&fslink->fexit.tramp_hlist);
> + tr->progs_cnt[BPF_TRAMP_FEXIT]--;
> + } else {
> + tr->progs_cnt[kind]--;
> + }
> }
> return err;
this seems confusing, how about we just add abolish bpf_fsession_link
and add extra hlist_node hook to struct bpf_tramp_link .. we will waste
16 bytes for other cases, but the code seems less confusing to me
untested, so I might overlooked something..
jirka
---
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 4e7d72dfbcd4..7479664844ea 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1309,6 +1309,7 @@ enum bpf_tramp_prog_type {
BPF_TRAMP_MODIFY_RETURN,
BPF_TRAMP_MAX,
BPF_TRAMP_REPLACE, /* more than MAX */
+ BPF_TRAMP_FSESSION,
};
struct bpf_tramp_image {
@@ -1861,6 +1862,7 @@ struct bpf_link_ops {
struct bpf_tramp_link {
struct bpf_link link;
struct hlist_node tramp_hlist;
+ struct hlist_node extra_hlist;
u64 cookie;
};
@@ -2169,6 +2171,19 @@ static inline void bpf_struct_ops_desc_release(struct bpf_struct_ops_desc *st_op
#endif
+static inline int bpf_fsession_cnt(struct bpf_tramp_links *links)
+{
+ struct bpf_tramp_links fentries = links[BPF_TRAMP_FENTRY];
+ int cnt = 0;
+
+ for (int i = 0; i < links[BPF_TRAMP_FENTRY].nr_links; i++) {
+ if (fentries.links[i]->link.prog->expected_attach_type == BPF_TRACE_FSESSION)
+ cnt++;
+ }
+
+ return cnt;
+}
+
int bpf_prog_ctx_arg_info_init(struct bpf_prog *prog,
const struct bpf_ctx_arg_aux *info, u32 cnt);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 84ced3ed2d21..cd2d7c4fc6e7 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1145,6 +1145,7 @@ enum bpf_attach_type {
BPF_NETKIT_PEER,
BPF_TRACE_KPROBE_SESSION,
BPF_TRACE_UPROBE_SESSION,
+ BPF_TRACE_FSESSION,
__MAX_BPF_ATTACH_TYPE
};
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 539c9fdea41d..8b1dcd440356 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -6107,6 +6107,7 @@ static int btf_validate_prog_ctx_type(struct bpf_verifier_log *log, const struct
case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT:
case BPF_MODIFY_RETURN:
+ case BPF_TRACE_FSESSION:
/* allow u64* as ctx */
if (btf_is_int(t) && t->size == 8)
return 0;
@@ -6704,6 +6705,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
fallthrough;
case BPF_LSM_CGROUP:
case BPF_TRACE_FEXIT:
+ case BPF_TRACE_FSESSION:
/* When LSM programs are attached to void LSM hooks
* they use FEXIT trampolines and when attached to
* int LSM hooks, they use MODIFY_RETURN trampolines.
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index a4d38272d8bc..d05f59bffa02 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3579,6 +3579,7 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog,
case BPF_PROG_TYPE_TRACING:
if (prog->expected_attach_type != BPF_TRACE_FENTRY &&
prog->expected_attach_type != BPF_TRACE_FEXIT &&
+ prog->expected_attach_type != BPF_TRACE_FSESSION &&
prog->expected_attach_type != BPF_MODIFY_RETURN) {
err = -EINVAL;
goto out_put_prog;
@@ -4352,6 +4353,7 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type)
case BPF_TRACE_RAW_TP:
case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT:
+ case BPF_TRACE_FSESSION:
case BPF_MODIFY_RETURN:
return BPF_PROG_TYPE_TRACING;
case BPF_LSM_MAC:
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 2a125d063e62..f27ed8b934f9 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -111,7 +111,7 @@ bool bpf_prog_has_trampoline(const struct bpf_prog *prog)
return (ptype == BPF_PROG_TYPE_TRACING &&
(eatype == BPF_TRACE_FENTRY || eatype == BPF_TRACE_FEXIT ||
- eatype == BPF_MODIFY_RETURN)) ||
+ eatype == BPF_MODIFY_RETURN || eatype == BPF_TRACE_FSESSION)) ||
(ptype == BPF_PROG_TYPE_LSM && eatype == BPF_LSM_MAC);
}
@@ -559,6 +559,8 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog)
return BPF_TRAMP_MODIFY_RETURN;
case BPF_TRACE_FEXIT:
return BPF_TRAMP_FEXIT;
+ case BPF_TRACE_FSESSION:
+ return BPF_TRAMP_FSESSION;
case BPF_LSM_MAC:
if (!prog->aux->attach_func_proto->type)
/* The function returns void, we cannot modify its
@@ -621,6 +623,8 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link,
BPF_MOD_JUMP, NULL,
link->link.prog->bpf_func);
}
+ if (kind == BPF_TRAMP_FSESSION)
+ cnt++;
if (cnt >= BPF_MAX_TRAMP_LINKS)
return -E2BIG;
if (!hlist_unhashed(&link->tramp_hlist))
@@ -633,12 +637,27 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link,
return -EBUSY;
}
- hlist_add_head(&link->tramp_hlist, &tr->progs_hlist[kind]);
- tr->progs_cnt[kind]++;
+ if (kind == BPF_TRAMP_FSESSION) {
+ hlist_add_head(&link->tramp_hlist, &tr->progs_hlist[BPF_TRAMP_FENTRY]);
+ hlist_add_head(&link->extra_hlist, &tr->progs_hlist[BPF_TRAMP_FEXIT]);
+ tr->progs_cnt[BPF_TRAMP_FENTRY]++;
+ tr->progs_cnt[BPF_TRAMP_FEXIT]++;
+ } else {
+ hlist_add_head(&link->tramp_hlist, &tr->progs_hlist[kind]);
+ tr->progs_cnt[kind]++;
+ }
+
err = bpf_trampoline_update(tr, true /* lock_direct_mutex */);
if (err) {
- hlist_del_init(&link->tramp_hlist);
- tr->progs_cnt[kind]--;
+ if (kind == BPF_TRAMP_FSESSION) {
+ hlist_del_init(&link->tramp_hlist);
+ hlist_del_init(&link->extra_hlist);
+ tr->progs_cnt[BPF_TRAMP_FENTRY]--;
+ tr->progs_cnt[BPF_TRAMP_FEXIT]--;
+ } else {
+ hlist_del_init(&link->tramp_hlist);
+ tr->progs_cnt[kind]--;
+ }
}
return err;
}
@@ -672,9 +691,15 @@ static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
guard(mutex)(&tgt_prog->aux->ext_mutex);
tgt_prog->aux->is_extended = false;
return err;
+ } else if (kind == BPF_TRAMP_FSESSION) {
+ hlist_del_init(&link->tramp_hlist);
+ hlist_del_init(&link->extra_hlist);
+ tr->progs_cnt[BPF_TRAMP_FENTRY]--;
+ tr->progs_cnt[BPF_TRAMP_FEXIT]--;
+ } else {
+ hlist_del_init(&link->tramp_hlist);
+ tr->progs_cnt[kind]--;
}
- hlist_del_init(&link->tramp_hlist);
- tr->progs_cnt[kind]--;
return bpf_trampoline_update(tr, true /* lock_direct_mutex */);
}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 2de1a736ef69..6146f63cb03a 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -17406,6 +17406,7 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char
switch (env->prog->expected_attach_type) {
case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT:
+ case BPF_TRACE_FSESSION:
range = retval_range(0, 0);
break;
case BPF_TRACE_RAW_TP:
@@ -23303,6 +23304,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
if (prog_type == BPF_PROG_TYPE_TRACING &&
insn->imm == BPF_FUNC_get_func_ret) {
if (eatype == BPF_TRACE_FEXIT ||
+ eatype == BPF_TRACE_FSESSION ||
eatype == BPF_MODIFY_RETURN) {
/* Load nr_args from ctx - 8 */
insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
@@ -24247,7 +24249,8 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
if (tgt_prog->type == BPF_PROG_TYPE_TRACING &&
prog_extension &&
(tgt_prog->expected_attach_type == BPF_TRACE_FENTRY ||
- tgt_prog->expected_attach_type == BPF_TRACE_FEXIT)) {
+ tgt_prog->expected_attach_type == BPF_TRACE_FEXIT ||
+ tgt_prog->expected_attach_type == BPF_TRACE_FSESSION)) {
/* Program extensions can extend all program types
* except fentry/fexit. The reason is the following.
* The fentry/fexit programs are used for performance
@@ -24262,7 +24265,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
* beyond reasonable stack size. Hence extending fentry
* is not allowed.
*/
- bpf_log(log, "Cannot extend fentry/fexit\n");
+ bpf_log(log, "Cannot extend fentry/fexit/fsession\n");
return -EINVAL;
}
} else {
@@ -24346,6 +24349,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
case BPF_LSM_CGROUP:
case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT:
+ case BPF_TRACE_FSESSION:
if (!btf_type_is_func(t)) {
bpf_log(log, "attach_btf_id %u is not a function\n",
btf_id);
@@ -24512,6 +24516,7 @@ static bool can_be_sleepable(struct bpf_prog *prog)
case BPF_TRACE_FEXIT:
case BPF_MODIFY_RETURN:
case BPF_TRACE_ITER:
+ case BPF_TRACE_FSESSION:
return true;
default:
return false;
@@ -24593,9 +24598,10 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
tgt_info.tgt_name);
return -EINVAL;
} else if ((prog->expected_attach_type == BPF_TRACE_FEXIT ||
+ prog->expected_attach_type == BPF_TRACE_FSESSION ||
prog->expected_attach_type == BPF_MODIFY_RETURN) &&
btf_id_set_contains(&noreturn_deny, btf_id)) {
- verbose(env, "Attaching fexit/fmod_ret to __noreturn function '%s' is rejected.\n",
+ verbose(env, "Attaching fexit/fsession/fmod_ret to __noreturn function '%s' is rejected.\n",
tgt_info.tgt_name);
return -EINVAL;
}
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 655efac6f133..3b0d9bd039de 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -685,6 +685,7 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog,
switch (prog->expected_attach_type) {
case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT:
+ case BPF_TRACE_FSESSION:
if (bpf_fentry_test1(1) != 2 ||
bpf_fentry_test2(2, 3) != 5 ||
bpf_fentry_test3(4, 5, 6) != 15 ||
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index 850dd736ccd1..de111818f3a0 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -365,6 +365,7 @@ static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog)
return true;
case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT:
+ case BPF_TRACE_FSESSION:
return !!strncmp(prog->aux->attach_func_name, "bpf_sk_storage",
strlen("bpf_sk_storage"));
default:
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 6b92b0847ec2..012abaf3d4ac 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1145,6 +1145,7 @@ enum bpf_attach_type {
BPF_NETKIT_PEER,
BPF_TRACE_KPROBE_SESSION,
BPF_TRACE_UPROBE_SESSION,
+ BPF_TRACE_FSESSION,
__MAX_BPF_ATTACH_TYPE
};
diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_failure.c b/tools/testing/selftests/bpf/prog_tests/tracing_failure.c
index 10e231965589..f9f9e1cb87bf 100644
--- a/tools/testing/selftests/bpf/prog_tests/tracing_failure.c
+++ b/tools/testing/selftests/bpf/prog_tests/tracing_failure.c
@@ -73,7 +73,7 @@ static void test_tracing_deny(void)
static void test_fexit_noreturns(void)
{
test_tracing_fail_prog("fexit_noreturns",
- "Attaching fexit/fmod_ret to __noreturn function 'do_exit' is rejected.");
+ "Attaching fexit/fsession/fmod_ret to __noreturn function 'do_exit' is rejected.");
}
void test_tracing_failure(void)
On 2026/1/1 21:52 Jiri Olsa <olsajiri@gmail.com> write:
> On Wed, Dec 24, 2025 at 09:07:26PM +0800, Menglong Dong wrote:
>
> SNIP
Hi, Jiri. Happy New Year!
>
> > +struct bpf_fsession_link {
> > + struct bpf_tracing_link link;
> > + struct bpf_tramp_link fexit;
> > +};
> > +
> > struct bpf_raw_tp_link {
> > struct bpf_link link;
> > struct bpf_raw_event_map *btp;
> > @@ -2114,6 +2120,20 @@ static inline void bpf_struct_ops_desc_release(struct bpf_struct_ops_desc *st_op
> >
> > #endif
> >
> > +static inline int bpf_fsession_cnt(struct bpf_tramp_links *links)
> > +{
> > + struct bpf_tramp_links fentries = links[BPF_TRAMP_FENTRY];
> > + int cnt = 0;
> > +
> > + for (int i = 0; i < links[BPF_TRAMP_FENTRY].nr_links; i++) {
> > + if (fentries.links[i]->link.prog->expected_attach_type ==
> > + BPF_TRACE_FSESSION)
>
> let's keep it on the single line ?
OK
>
> > + cnt++;
> > + }
> > +
> > + return cnt;
> > +}
> > +
[......]
> > @@ -3628,7 +3629,21 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog,
> > key = bpf_trampoline_compute_key(tgt_prog, NULL, btf_id);
> > }
> >
> > - link = kzalloc(sizeof(*link), GFP_USER);
> > + if (prog->expected_attach_type == BPF_TRACE_FSESSION) {
> > + struct bpf_fsession_link *fslink;
> > +
> > + fslink = kzalloc(sizeof(*fslink), GFP_USER);
> > + if (fslink) {
> > + bpf_link_init(&fslink->fexit.link, BPF_LINK_TYPE_TRACING,
> > + &bpf_tracing_link_lops, prog, attach_type);
>
> I don't think we need the extra exit struct bpf_link, we just need
> hlist_node hook for exit program, so this should perhaps be:
>
> struct bpf_fsession_link {
> struct bpf_tracing_link link;
> struct hlist_node tramp_hlist;
> };
I think we can't do it this way according to how we manager
the bpf_link in trampoline, as you can see in
bpf_trampoline_get_progs() and the struct of bpf_tramp_links.
In bpf_trampoline_get_progs(), it will lookup all the bpf_link
in the trampoline. If we simply add the bpf_fsession_link->tramp_hlist,
the struct in the progs_hlist will be inconsistent.
>
>
> SNIP
>
> > @@ -596,6 +598,8 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link,
> > {
> > enum bpf_tramp_prog_type kind;
> > struct bpf_tramp_link *link_exiting;
> > + struct bpf_fsession_link *fslink;
> > + struct hlist_head *prog_list;
> > int err = 0;
> > int cnt = 0, i;
> >
> > @@ -621,24 +625,44 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link,
> > BPF_MOD_JUMP, NULL,
> > link->link.prog->bpf_func);
> > }
> > + if (kind == BPF_TRAMP_FSESSION) {
> > + prog_list = &tr->progs_hlist[BPF_TRAMP_FENTRY];
> > + cnt++;
> > + } else {
> > + prog_list = &tr->progs_hlist[kind];
> > + }
> > if (cnt >= BPF_MAX_TRAMP_LINKS)
> > return -E2BIG;
> > if (!hlist_unhashed(&link->tramp_hlist))
> > /* prog already linked */
> > return -EBUSY;
> > - hlist_for_each_entry(link_exiting, &tr->progs_hlist[kind], tramp_hlist) {
> > + hlist_for_each_entry(link_exiting, prog_list, tramp_hlist) {
> > if (link_exiting->link.prog != link->link.prog)
> > continue;
> > /* prog already linked */
> > return -EBUSY;
> > }
> >
> > - hlist_add_head(&link->tramp_hlist, &tr->progs_hlist[kind]);
> > - tr->progs_cnt[kind]++;
> > + hlist_add_head(&link->tramp_hlist, prog_list);
> > + if (kind == BPF_TRAMP_FSESSION) {
> > + tr->progs_cnt[BPF_TRAMP_FENTRY]++;
> > + fslink = container_of(link, struct bpf_fsession_link, link.link);
> > + hlist_add_head(&fslink->fexit.tramp_hlist,
> > + &tr->progs_hlist[BPF_TRAMP_FEXIT]);
> > + tr->progs_cnt[BPF_TRAMP_FEXIT]++;
> > + } else {
> > + tr->progs_cnt[kind]++;
> > + }
> > err = bpf_trampoline_update(tr, true /* lock_direct_mutex */);
> > if (err) {
> > hlist_del_init(&link->tramp_hlist);
> > - tr->progs_cnt[kind]--;
> > + if (kind == BPF_TRAMP_FSESSION) {
> > + tr->progs_cnt[BPF_TRAMP_FENTRY]--;
> > + hlist_del_init(&fslink->fexit.tramp_hlist);
> > + tr->progs_cnt[BPF_TRAMP_FEXIT]--;
> > + } else {
> > + tr->progs_cnt[kind]--;
> > + }
> > }
> > return err;
>
> this seems confusing, how about we just add abolish bpf_fsession_link
It was more confusing in V1. I adopted Andrii's suggestion in
this version to make the logic here more clear. But it seems
still confusing :/
Maybe we need more document here to help the understanding.
> and add extra hlist_node hook to struct bpf_tramp_link .. we will waste
> 16 bytes for other cases, but the code seems less confusing to me
>
> untested, so I might overlooked something..
>
> jirka
>
>
>
> ---
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 4e7d72dfbcd4..7479664844ea 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -1309,6 +1309,7 @@ enum bpf_tramp_prog_type {
> BPF_TRAMP_MODIFY_RETURN,
> BPF_TRAMP_MAX,
> BPF_TRAMP_REPLACE, /* more than MAX */
> + BPF_TRAMP_FSESSION,
> };
>
> struct bpf_tramp_image {
> @@ -1861,6 +1862,7 @@ struct bpf_link_ops {
> struct bpf_tramp_link {
> struct bpf_link link;
> struct hlist_node tramp_hlist;
> + struct hlist_node extra_hlist;
> u64 cookie;
> };
In this way, it indeed can make the update of the hlist more clear. However,
I think that you missed the reading of the hlist as I mentioned above.
You can't add both the "tramp_hlist" and "extra_hlist" to the same hlist. If
so, how do we iterate the hlist? Do I miss something?
Thanks!
Menglong Dong
>
[......]
> void test_tracing_failure(void)
>
>
>
On Fri, Jan 02, 2026 at 05:21:42PM +0800, Menglong Dong wrote:
SNIP
> > ---
> > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > index 4e7d72dfbcd4..7479664844ea 100644
> > --- a/include/linux/bpf.h
> > +++ b/include/linux/bpf.h
> > @@ -1309,6 +1309,7 @@ enum bpf_tramp_prog_type {
> > BPF_TRAMP_MODIFY_RETURN,
> > BPF_TRAMP_MAX,
> > BPF_TRAMP_REPLACE, /* more than MAX */
> > + BPF_TRAMP_FSESSION,
> > };
> >
> > struct bpf_tramp_image {
> > @@ -1861,6 +1862,7 @@ struct bpf_link_ops {
> > struct bpf_tramp_link {
> > struct bpf_link link;
> > struct hlist_node tramp_hlist;
> > + struct hlist_node extra_hlist;
> > u64 cookie;
> > };
>
> In this way, it indeed can make the update of the hlist more clear. However,
> I think that you missed the reading of the hlist as I mentioned above.
> You can't add both the "tramp_hlist" and "extra_hlist" to the same hlist. If
> so, how do we iterate the hlist? Do I miss something?
ugh, it's on the same list.. nevermind then ;-)
jirka
© 2016 - 2026 Red Hat, Inc.