From: Geliang Tang <tanggeliang@kylinos.cn>
This patch implements a new struct bpf_struct_ops for MPTCP BPF path
manager: bpf_mptcp_pm_ops. Register and unregister the bpf path manager
in .reg and .unreg.
Add write access for some fields of struct mptcp_sock and struct
mptcp_pm_addr_entry in .btf_struct_access.
This MPTCP BPF path manager implementation is similar to BPF TCP CC. And
net/ipv4/bpf_tcp_ca.c is a frame of reference for this patch.
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
---
net/mptcp/bpf.c | 259 +++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 258 insertions(+), 1 deletion(-)
diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c
index 2b0cfb57df8c..596574102b89 100644
--- a/net/mptcp/bpf.c
+++ b/net/mptcp/bpf.c
@@ -17,10 +17,266 @@
#include "protocol.h"
#ifdef CONFIG_BPF_JIT
-static struct bpf_struct_ops bpf_mptcp_sched_ops;
+static struct bpf_struct_ops bpf_mptcp_pm_ops,
+ bpf_mptcp_sched_ops;
static u32 mptcp_sock_id,
+ mptcp_entry_id,
mptcp_subflow_id;
+/* MPTCP BPF path manager */
+
+static const struct bpf_func_proto *
+bpf_mptcp_pm_get_func_proto(enum bpf_func_id func_id,
+ const struct bpf_prog *prog)
+{
+ switch (func_id) {
+ case BPF_FUNC_sk_storage_get:
+ return &bpf_sk_storage_get_proto;
+ case BPF_FUNC_sk_storage_delete:
+ return &bpf_sk_storage_delete_proto;
+ default:
+ return bpf_base_func_proto(func_id, prog);
+ }
+}
+
+static int bpf_mptcp_pm_btf_struct_access(struct bpf_verifier_log *log,
+ const struct bpf_reg_state *reg,
+ int off, int size)
+{
+ u32 id = reg->btf_id;
+ size_t end;
+
+ if (id == mptcp_sock_id) {
+ switch (off) {
+ case offsetof(struct mptcp_sock, pm.remote.id):
+ end = offsetofend(struct mptcp_sock, pm.remote.id);
+ break;
+ case offsetof(struct mptcp_sock, pm.remote.family):
+ end = offsetofend(struct mptcp_sock, pm.remote.family);
+ break;
+ case offsetof(struct mptcp_sock, pm.remote.port):
+ end = offsetofend(struct mptcp_sock, pm.remote.port);
+ break;
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ case offsetof(struct mptcp_sock, pm.remote.addr6.s6_addr32[0]):
+ end = offsetofend(struct mptcp_sock, pm.remote.addr6.s6_addr32[0]);
+ break;
+ case offsetof(struct mptcp_sock, pm.remote.addr6.s6_addr32[1]):
+ end = offsetofend(struct mptcp_sock, pm.remote.addr6.s6_addr32[1]);
+ break;
+ case offsetof(struct mptcp_sock, pm.remote.addr6.s6_addr32[2]):
+ end = offsetofend(struct mptcp_sock, pm.remote.addr6.s6_addr32[2]);
+ break;
+ case offsetof(struct mptcp_sock, pm.remote.addr6.s6_addr32[3]):
+ end = offsetofend(struct mptcp_sock, pm.remote.addr6.s6_addr32[3]);
+ break;
+#else
+ case offsetof(struct mptcp_sock, pm.remote.addr.s_addr):
+ end = offsetofend(struct mptcp_sock, pm.remote.addr.s_addr);
+ break;
+#endif
+ case offsetof(struct mptcp_sock, pm.work_pending):
+ end = offsetofend(struct mptcp_sock, pm.work_pending);
+ break;
+ case offsetof(struct mptcp_sock, pm.accept_addr):
+ end = offsetofend(struct mptcp_sock, pm.accept_addr);
+ break;
+ case offsetof(struct mptcp_sock, pm.accept_subflow):
+ end = offsetofend(struct mptcp_sock, pm.accept_subflow);
+ break;
+ case offsetof(struct mptcp_sock, pm.add_addr_signaled):
+ end = offsetofend(struct mptcp_sock, pm.add_addr_signaled);
+ break;
+ case offsetof(struct mptcp_sock, pm.local_addr_used):
+ end = offsetofend(struct mptcp_sock, pm.local_addr_used);
+ break;
+ case offsetof(struct mptcp_sock, pm.subflows):
+ end = offsetofend(struct mptcp_sock, pm.subflows);
+ break;
+ default:
+ bpf_log(log, "no write support to mptcp_sock at off %d\n",
+ off);
+ return -EACCES;
+ }
+ } else if (id == mptcp_entry_id) {
+ switch (off) {
+ case offsetof(struct mptcp_pm_addr_entry, addr.id):
+ end = offsetofend(struct mptcp_pm_addr_entry, addr.id);
+ break;
+ case offsetof(struct mptcp_pm_addr_entry, addr.port):
+ end = offsetofend(struct mptcp_pm_addr_entry, addr.port);
+ break;
+ default:
+ bpf_log(log, "no write support to mptcp_pm_addr_entry at off %d\n",
+ off);
+ return -EACCES;
+ }
+ } else {
+ bpf_log(log, "only access to mptcp sock or addr or entry is supported\n");
+ return -EACCES;
+ }
+
+ if (off + size > end) {
+ bpf_log(log, "access beyond %s at off %u size %u ended at %zu",
+ id == mptcp_sock_id ? "mptcp_sock" :
+ (id == mptcp_entry_id ? "mptcp_pm_addr_entry" : "mptcp_addr_info"),
+ off, size, end);
+ return -EACCES;
+ }
+
+ return NOT_INIT;
+}
+
+static const struct bpf_verifier_ops bpf_mptcp_pm_verifier_ops = {
+ .get_func_proto = bpf_mptcp_pm_get_func_proto,
+ .is_valid_access = bpf_tracing_btf_ctx_access,
+ .btf_struct_access = bpf_mptcp_pm_btf_struct_access,
+};
+
+static int bpf_mptcp_pm_reg(void *kdata, struct bpf_link *link)
+{
+ return mptcp_pm_register(kdata);
+}
+
+static void bpf_mptcp_pm_unreg(void *kdata, struct bpf_link *link)
+{
+ mptcp_pm_unregister(kdata);
+}
+
+static int bpf_mptcp_pm_check_member(const struct btf_type *t,
+ const struct btf_member *member,
+ const struct bpf_prog *prog)
+{
+ return 0;
+}
+
+static int bpf_mptcp_pm_init_member(const struct btf_type *t,
+ const struct btf_member *member,
+ void *kdata, const void *udata)
+{
+ const struct mptcp_pm_ops *upm;
+ struct mptcp_pm_ops *pm;
+ u32 moff;
+
+ upm = (const struct mptcp_pm_ops *)udata;
+ pm = (struct mptcp_pm_ops *)kdata;
+
+ moff = __btf_member_bit_offset(t, member) / 8;
+ switch (moff) {
+ case offsetof(struct mptcp_pm_ops, name):
+ if (bpf_obj_name_cpy(pm->name, upm->name,
+ sizeof(pm->name)) <= 0)
+ return -EINVAL;
+ return 1;
+ }
+
+ return 0;
+}
+
+static int bpf_mptcp_pm_init(struct btf *btf)
+{
+ s32 type_id;
+
+ type_id = btf_find_by_name_kind(btf, "mptcp_sock",
+ BTF_KIND_STRUCT);
+ if (type_id < 0)
+ return -EINVAL;
+ mptcp_sock_id = type_id;
+
+ type_id = btf_find_by_name_kind(btf, "mptcp_pm_addr_entry",
+ BTF_KIND_STRUCT);
+ if (type_id < 0)
+ return -EINVAL;
+ mptcp_entry_id = type_id;
+
+ return 0;
+}
+
+static int bpf_mptcp_pm_validate(void *kdata)
+{
+ return mptcp_pm_validate(kdata);
+}
+
+static int __bpf_mptcp_pm_get_local_id(struct mptcp_sock *msk,
+ struct mptcp_pm_addr_entry *skc)
+{
+ return 0;
+}
+
+static bool __bpf_mptcp_pm_get_priority(struct mptcp_sock *msk,
+ struct mptcp_addr_info *skc)
+{
+ return false;
+}
+
+static void __bpf_mptcp_pm_established(struct mptcp_sock *msk)
+{
+}
+
+static void __bpf_mptcp_pm_subflow_established(struct mptcp_sock *msk)
+{
+}
+
+static bool __bpf_mptcp_pm_allow_new_subflow(struct mptcp_sock *msk)
+{
+ return false;
+}
+
+static bool __bpf_mptcp_pm_accept_new_subflow(const struct mptcp_sock *msk)
+{
+ return false;
+}
+
+static bool __bpf_mptcp_pm_add_addr_echo(struct mptcp_sock *msk,
+ const struct mptcp_addr_info *addr)
+{
+ return false;
+}
+
+static int __bpf_mptcp_pm_add_addr_received(struct mptcp_sock *msk,
+ const struct mptcp_addr_info *addr)
+{
+ return 0;
+}
+
+static void __bpf_mptcp_pm_rm_addr_received(struct mptcp_sock *msk)
+{
+}
+
+static void __bpf_mptcp_pm_init(struct mptcp_sock *msk)
+{
+}
+
+static void __bpf_mptcp_pm_release(struct mptcp_sock *msk)
+{
+}
+
+static struct mptcp_pm_ops __bpf_mptcp_pm_ops = {
+ .get_local_id = __bpf_mptcp_pm_get_local_id,
+ .get_priority = __bpf_mptcp_pm_get_priority,
+ .established = __bpf_mptcp_pm_established,
+ .subflow_established = __bpf_mptcp_pm_subflow_established,
+ .allow_new_subflow = __bpf_mptcp_pm_allow_new_subflow,
+ .accept_new_subflow = __bpf_mptcp_pm_accept_new_subflow,
+ .add_addr_echo = __bpf_mptcp_pm_add_addr_echo,
+ .add_addr_received = __bpf_mptcp_pm_add_addr_received,
+ .rm_addr_received = __bpf_mptcp_pm_rm_addr_received,
+ .init = __bpf_mptcp_pm_init,
+ .release = __bpf_mptcp_pm_release,
+};
+
+static struct bpf_struct_ops bpf_mptcp_pm_ops = {
+ .verifier_ops = &bpf_mptcp_pm_verifier_ops,
+ .reg = bpf_mptcp_pm_reg,
+ .unreg = bpf_mptcp_pm_unreg,
+ .check_member = bpf_mptcp_pm_check_member,
+ .init_member = bpf_mptcp_pm_init_member,
+ .init = bpf_mptcp_pm_init,
+ .validate = bpf_mptcp_pm_validate,
+ .name = "mptcp_pm_ops",
+ .cfi_stubs = &__bpf_mptcp_pm_ops,
+};
+
/* MPTCP BPF packet scheduler */
static const struct bpf_func_proto *
@@ -332,6 +588,7 @@ static int __init bpf_mptcp_kfunc_init(void)
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
&bpf_mptcp_common_kfunc_set);
#ifdef CONFIG_BPF_JIT
+ ret = ret ?: register_bpf_struct_ops(&bpf_mptcp_pm_ops, mptcp_pm_ops);
ret = ret ?: register_bpf_struct_ops(&bpf_mptcp_sched_ops, mptcp_sched_ops);
#endif
--
2.43.0
Hi Geliang, On 21/03/2025 02:49, Geliang Tang wrote: > From: Geliang Tang <tanggeliang@kylinos.cn> > > This patch implements a new struct bpf_struct_ops for MPTCP BPF path > manager: bpf_mptcp_pm_ops. Register and unregister the bpf path manager > in .reg and .unreg. > > Add write access for some fields of struct mptcp_sock and struct > mptcp_pm_addr_entry in .btf_struct_access. > > This MPTCP BPF path manager implementation is similar to BPF TCP CC. And > net/ipv4/bpf_tcp_ca.c is a frame of reference for this patch. > > Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> > --- > net/mptcp/bpf.c | 259 +++++++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 258 insertions(+), 1 deletion(-) > > diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c > index 2b0cfb57df8c..596574102b89 100644 > --- a/net/mptcp/bpf.c > +++ b/net/mptcp/bpf.c (...) > +static int __bpf_mptcp_pm_get_local_id(struct mptcp_sock *msk, > + struct mptcp_pm_addr_entry *skc) > +{ > + return 0; > +} > + > +static bool __bpf_mptcp_pm_get_priority(struct mptcp_sock *msk, > + struct mptcp_addr_info *skc) > +{ > + return false; > +} > + > +static void __bpf_mptcp_pm_established(struct mptcp_sock *msk) > +{ > +} > + > +static void __bpf_mptcp_pm_subflow_established(struct mptcp_sock *msk) > +{ > +} > + > +static bool __bpf_mptcp_pm_allow_new_subflow(struct mptcp_sock *msk) > +{ > + return false; > +} > + > +static bool __bpf_mptcp_pm_accept_new_subflow(const struct mptcp_sock *msk) > +{ > + return false; > +} > + > +static bool __bpf_mptcp_pm_add_addr_echo(struct mptcp_sock *msk, > + const struct mptcp_addr_info *addr) > +{ > + return false; > +} > + > +static int __bpf_mptcp_pm_add_addr_received(struct mptcp_sock *msk, > + const struct mptcp_addr_info *addr) > +{ > + return 0; > +} > + > +static void __bpf_mptcp_pm_rm_addr_received(struct mptcp_sock *msk) > +{ > +} > + > +static void __bpf_mptcp_pm_init(struct mptcp_sock *msk) > +{ > +} > + > +static void __bpf_mptcp_pm_release(struct mptcp_sock *msk) > +{ > +} > + > +static struct mptcp_pm_ops __bpf_mptcp_pm_ops = { > + .get_local_id = __bpf_mptcp_pm_get_local_id, > + .get_priority = __bpf_mptcp_pm_get_priority, > + .established = __bpf_mptcp_pm_established, > + .subflow_established = __bpf_mptcp_pm_subflow_established, > + .allow_new_subflow = __bpf_mptcp_pm_allow_new_subflow, > + .accept_new_subflow = __bpf_mptcp_pm_accept_new_subflow, > + .add_addr_echo = __bpf_mptcp_pm_add_addr_echo, > + .add_addr_received = __bpf_mptcp_pm_add_addr_received, > + .rm_addr_received = __bpf_mptcp_pm_rm_addr_received, Out of curiosity: I see here that even the optional hooks are assigned: does it mean that all function pointers will never be NULL and checks like 'pm->ops->add_addr_received' will always be true with a BPF PM? Or is it still OK to assign them to NULL for a new BPF PM? Cheers, Matt -- Sponsored by the NGI0 Core fund.
On Mon, 2025-03-24 at 11:26 +0100, Matthieu Baerts wrote: > Hi Geliang, > > On 21/03/2025 02:49, Geliang Tang wrote: > > From: Geliang Tang <tanggeliang@kylinos.cn> > > > > This patch implements a new struct bpf_struct_ops for MPTCP BPF > > path > > manager: bpf_mptcp_pm_ops. Register and unregister the bpf path > > manager > > in .reg and .unreg. > > > > Add write access for some fields of struct mptcp_sock and struct > > mptcp_pm_addr_entry in .btf_struct_access. > > > > This MPTCP BPF path manager implementation is similar to BPF TCP > > CC. And > > net/ipv4/bpf_tcp_ca.c is a frame of reference for this patch. > > > > Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> > > --- > > net/mptcp/bpf.c | 259 > > +++++++++++++++++++++++++++++++++++++++++++++++- > > 1 file changed, 258 insertions(+), 1 deletion(-) > > > > diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c > > index 2b0cfb57df8c..596574102b89 100644 > > --- a/net/mptcp/bpf.c > > +++ b/net/mptcp/bpf.c > > (...) > > > +static int __bpf_mptcp_pm_get_local_id(struct mptcp_sock *msk, > > + struct mptcp_pm_addr_entry > > *skc) > > +{ > > + return 0; > > +} > > + > > +static bool __bpf_mptcp_pm_get_priority(struct mptcp_sock *msk, > > + struct mptcp_addr_info > > *skc) > > +{ > > + return false; > > +} > > + > > +static void __bpf_mptcp_pm_established(struct mptcp_sock *msk) > > +{ > > +} > > + > > +static void __bpf_mptcp_pm_subflow_established(struct mptcp_sock > > *msk) > > +{ > > +} > > + > > +static bool __bpf_mptcp_pm_allow_new_subflow(struct mptcp_sock > > *msk) > > +{ > > + return false; > > +} > > + > > +static bool __bpf_mptcp_pm_accept_new_subflow(const struct > > mptcp_sock *msk) > > +{ > > + return false; > > +} > > + > > +static bool __bpf_mptcp_pm_add_addr_echo(struct mptcp_sock *msk, > > + const struct > > mptcp_addr_info *addr) > > +{ > > + return false; > > +} > > + > > +static int __bpf_mptcp_pm_add_addr_received(struct mptcp_sock > > *msk, > > + const struct > > mptcp_addr_info *addr) > > +{ > > + return 0; > > +} > > + > > +static void __bpf_mptcp_pm_rm_addr_received(struct mptcp_sock > > *msk) > > +{ > > +} > > + > > +static void __bpf_mptcp_pm_init(struct mptcp_sock *msk) > > +{ > > +} > > + > > +static void __bpf_mptcp_pm_release(struct mptcp_sock *msk) > > +{ > > +} > > + > > +static struct mptcp_pm_ops __bpf_mptcp_pm_ops = { > > + .get_local_id = __bpf_mptcp_pm_get_local_id, > > + .get_priority = __bpf_mptcp_pm_get_priority, > > + .established = __bpf_mptcp_pm_established, > > + .subflow_established = > > __bpf_mptcp_pm_subflow_established, > > + .allow_new_subflow = > > __bpf_mptcp_pm_allow_new_subflow, > > + .accept_new_subflow = > > __bpf_mptcp_pm_accept_new_subflow, > > + .add_addr_echo = __bpf_mptcp_pm_add_addr_echo, > > + .add_addr_received = > > __bpf_mptcp_pm_add_addr_received, > > + .rm_addr_received = __bpf_mptcp_pm_rm_addr_received, > > Out of curiosity: I see here that even the optional hooks are > assigned: Optional hooks must be assigned here, otherwise this hook cannot be defined in BPF. > does it mean that all function pointers will never be NULL and checks > like 'pm->ops->add_addr_received' will always be true with a BPF PM? > Or > is it still OK to assign them to NULL for a new BPF PM? I think it's the latter, it's OK to assign them to NULL. Thanks, -Geliang > > Cheers, > Matt
Hi Geliang, On 24/03/2025 11:43, Geliang Tang wrote: > On Mon, 2025-03-24 at 11:26 +0100, Matthieu Baerts wrote: >> Hi Geliang, >> >> On 21/03/2025 02:49, Geliang Tang wrote: >>> From: Geliang Tang <tanggeliang@kylinos.cn> >>> >>> This patch implements a new struct bpf_struct_ops for MPTCP BPF >>> path >>> manager: bpf_mptcp_pm_ops. Register and unregister the bpf path >>> manager >>> in .reg and .unreg. >>> >>> Add write access for some fields of struct mptcp_sock and struct >>> mptcp_pm_addr_entry in .btf_struct_access. >>> >>> This MPTCP BPF path manager implementation is similar to BPF TCP >>> CC. And >>> net/ipv4/bpf_tcp_ca.c is a frame of reference for this patch. >>> >>> Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> >>> --- >>> net/mptcp/bpf.c | 259 >>> +++++++++++++++++++++++++++++++++++++++++++++++- >>> 1 file changed, 258 insertions(+), 1 deletion(-) >>> >>> diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c >>> index 2b0cfb57df8c..596574102b89 100644 >>> --- a/net/mptcp/bpf.c >>> +++ b/net/mptcp/bpf.c >> >> (...) >> >>> +static int __bpf_mptcp_pm_get_local_id(struct mptcp_sock *msk, >>> + struct mptcp_pm_addr_entry >>> *skc) >>> +{ >>> + return 0; >>> +} >>> + >>> +static bool __bpf_mptcp_pm_get_priority(struct mptcp_sock *msk, >>> + struct mptcp_addr_info >>> *skc) >>> +{ >>> + return false; >>> +} >>> + >>> +static void __bpf_mptcp_pm_established(struct mptcp_sock *msk) >>> +{ >>> +} >>> + >>> +static void __bpf_mptcp_pm_subflow_established(struct mptcp_sock >>> *msk) >>> +{ >>> +} >>> + >>> +static bool __bpf_mptcp_pm_allow_new_subflow(struct mptcp_sock >>> *msk) >>> +{ >>> + return false; >>> +} >>> + >>> +static bool __bpf_mptcp_pm_accept_new_subflow(const struct >>> mptcp_sock *msk) >>> +{ >>> + return false; >>> +} >>> + >>> +static bool __bpf_mptcp_pm_add_addr_echo(struct mptcp_sock *msk, >>> + const struct >>> mptcp_addr_info *addr) >>> +{ >>> + return false; >>> +} >>> + >>> +static int __bpf_mptcp_pm_add_addr_received(struct mptcp_sock >>> *msk, >>> + const struct >>> mptcp_addr_info *addr) >>> +{ >>> + return 0; >>> +} >>> + >>> +static void __bpf_mptcp_pm_rm_addr_received(struct mptcp_sock >>> *msk) >>> +{ >>> +} >>> + >>> +static void __bpf_mptcp_pm_init(struct mptcp_sock *msk) >>> +{ >>> +} >>> + >>> +static void __bpf_mptcp_pm_release(struct mptcp_sock *msk) >>> +{ >>> +} >>> + >>> +static struct mptcp_pm_ops __bpf_mptcp_pm_ops = { >>> + .get_local_id = __bpf_mptcp_pm_get_local_id, >>> + .get_priority = __bpf_mptcp_pm_get_priority, >>> + .established = __bpf_mptcp_pm_established, >>> + .subflow_established = >>> __bpf_mptcp_pm_subflow_established, >>> + .allow_new_subflow = >>> __bpf_mptcp_pm_allow_new_subflow, >>> + .accept_new_subflow = >>> __bpf_mptcp_pm_accept_new_subflow, >>> + .add_addr_echo = __bpf_mptcp_pm_add_addr_echo, >>> + .add_addr_received = >>> __bpf_mptcp_pm_add_addr_received, >>> + .rm_addr_received = __bpf_mptcp_pm_rm_addr_received, >> >> Out of curiosity: I see here that even the optional hooks are >> assigned: > > Optional hooks must be assigned here, otherwise this hook cannot be > defined in BPF. OK, thanks! >> does it mean that all function pointers will never be NULL and checks >> like 'pm->ops->add_addr_received' will always be true with a BPF PM? >> Or >> is it still OK to assign them to NULL for a new BPF PM? > > I think it's the latter, it's OK to assign them to NULL. If you have the infrastructure ready, can you check if you can set add_addr_received to NULL in a new BPF struct_ops mptcp_pm_ops for example please? Also, just to be sure, can you also check that in this case, in the pm.c, pm->ops->add_addr_received is also set to NULL and not to __bpf_mptcp_pm_add_addr_received? (not urgent) Cheers, Matt -- Sponsored by the NGI0 Core fund.
On Mon, 2025-03-24 at 12:06 +0100, Matthieu Baerts wrote: > Hi Geliang, > > On 24/03/2025 11:43, Geliang Tang wrote: > > On Mon, 2025-03-24 at 11:26 +0100, Matthieu Baerts wrote: > > > Hi Geliang, > > > > > > On 21/03/2025 02:49, Geliang Tang wrote: > > > > From: Geliang Tang <tanggeliang@kylinos.cn> > > > > > > > > This patch implements a new struct bpf_struct_ops for MPTCP BPF > > > > path > > > > manager: bpf_mptcp_pm_ops. Register and unregister the bpf path > > > > manager > > > > in .reg and .unreg. > > > > > > > > Add write access for some fields of struct mptcp_sock and > > > > struct > > > > mptcp_pm_addr_entry in .btf_struct_access. > > > > > > > > This MPTCP BPF path manager implementation is similar to BPF > > > > TCP > > > > CC. And > > > > net/ipv4/bpf_tcp_ca.c is a frame of reference for this patch. > > > > > > > > Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> > > > > --- > > > > net/mptcp/bpf.c | 259 > > > > +++++++++++++++++++++++++++++++++++++++++++++++- > > > > 1 file changed, 258 insertions(+), 1 deletion(-) > > > > > > > > diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c > > > > index 2b0cfb57df8c..596574102b89 100644 > > > > --- a/net/mptcp/bpf.c > > > > +++ b/net/mptcp/bpf.c > > > > > > (...) > > > > > > > +static int __bpf_mptcp_pm_get_local_id(struct mptcp_sock *msk, > > > > + struct > > > > mptcp_pm_addr_entry > > > > *skc) > > > > +{ > > > > + return 0; > > > > +} > > > > + > > > > +static bool __bpf_mptcp_pm_get_priority(struct mptcp_sock > > > > *msk, > > > > + struct mptcp_addr_info > > > > *skc) > > > > +{ > > > > + return false; > > > > +} > > > > + > > > > +static void __bpf_mptcp_pm_established(struct mptcp_sock *msk) > > > > +{ > > > > +} > > > > + > > > > +static void __bpf_mptcp_pm_subflow_established(struct > > > > mptcp_sock > > > > *msk) > > > > +{ > > > > +} > > > > + > > > > +static bool __bpf_mptcp_pm_allow_new_subflow(struct mptcp_sock > > > > *msk) > > > > +{ > > > > + return false; > > > > +} > > > > + > > > > +static bool __bpf_mptcp_pm_accept_new_subflow(const struct > > > > mptcp_sock *msk) > > > > +{ > > > > + return false; > > > > +} > > > > + > > > > +static bool __bpf_mptcp_pm_add_addr_echo(struct mptcp_sock > > > > *msk, > > > > + const struct > > > > mptcp_addr_info *addr) > > > > +{ > > > > + return false; > > > > +} > > > > + > > > > +static int __bpf_mptcp_pm_add_addr_received(struct mptcp_sock > > > > *msk, > > > > + const struct > > > > mptcp_addr_info *addr) > > > > +{ > > > > + return 0; > > > > +} > > > > + > > > > +static void __bpf_mptcp_pm_rm_addr_received(struct mptcp_sock > > > > *msk) > > > > +{ > > > > +} > > > > + > > > > +static void __bpf_mptcp_pm_init(struct mptcp_sock *msk) > > > > +{ > > > > +} > > > > + > > > > +static void __bpf_mptcp_pm_release(struct mptcp_sock *msk) > > > > +{ > > > > +} > > > > + > > > > +static struct mptcp_pm_ops __bpf_mptcp_pm_ops = { > > > > + .get_local_id = __bpf_mptcp_pm_get_local_id, > > > > + .get_priority = __bpf_mptcp_pm_get_priority, > > > > + .established = __bpf_mptcp_pm_established, > > > > + .subflow_established = > > > > __bpf_mptcp_pm_subflow_established, > > > > + .allow_new_subflow = > > > > __bpf_mptcp_pm_allow_new_subflow, > > > > + .accept_new_subflow = > > > > __bpf_mptcp_pm_accept_new_subflow, > > > > + .add_addr_echo = > > > > __bpf_mptcp_pm_add_addr_echo, > > > > + .add_addr_received = > > > > __bpf_mptcp_pm_add_addr_received, > > > > + .rm_addr_received = > > > > __bpf_mptcp_pm_rm_addr_received, > > > > > > Out of curiosity: I see here that even the optional hooks are > > > assigned: > > > > Optional hooks must be assigned here, otherwise this hook cannot be > > defined in BPF. > > OK, thanks! > > > > does it mean that all function pointers will never be NULL and > > > checks > > > like 'pm->ops->add_addr_received' will always be true with a BPF > > > PM? > > > Or > > > is it still OK to assign them to NULL for a new BPF PM? > > > > I think it's the latter, it's OK to assign them to NULL. > > If you have the infrastructure ready, can you check if you can set > add_addr_received to NULL in a new BPF struct_ops mptcp_pm_ops for > example please? Also, just to be sure, can you also check that in > this > case, in the pm.c, pm->ops->add_addr_received is also set to NULL and > not to __bpf_mptcp_pm_add_addr_received? (not urgent) Sure, here's the test: diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index f9fed096d77c..6bdca0dcf21e 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -578,6 +578,9 @@ void mptcp_pm_add_addr_received(const struct sock *ssk, pr_debug("msk=%p remote_id=%d accept=%d\n", msk, addr->id, READ_ONCE(pm->accept_addr)); + pr_info("%s name=%s, pm->ops->add_addr_received=%p\n", + __func__, pm->ops->name, pm->ops->add_addr_received); + mptcp_event_addr_announced(ssk, addr); spin_lock_bh(&pm->lock); diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_userspace_pm.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_userspace_pm.c index 2f8e0e85b5d7..8aa4b8c9ce33 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_userspace_pm.c +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_userspace_pm.c @@ -265,4 +265,5 @@ struct mptcp_pm_ops bpf_userspace = { .init = (void *)mptcp_pm_userspace_init, .release = (void *)mptcp_pm_userspace_release, .name = "bpf_userspace", + .add_addr_received = (void *)NULL, }; And the output: [ 18.229067][ C0] MPTCP: mptcp_pm_add_addr_received name=kernel, pm->ops->add_addr_received=00000000cd865d66 [ 18.231316][ C0] MPTCP: mptcp_pm_add_addr_received name=kernel, pm->ops->add_addr_received=00000000cd865d66 [ 21.105658][ C0] MPTCP: mptcp_pm_add_addr_received name=bpf_netlink, pm->ops->add_addr_received=00000000fe7b7426 [ 21.106419][ C0] MPTCP: mptcp_pm_add_addr_received name=bpf_netlink, pm->ops->add_addr_received=00000000fe7b7426 [ 24.767318][ C0] MPTCP: mptcp_pm_add_addr_received name=userspace, pm->ops->add_addr_received=0000000000000000 [ 28.220824][ C0] MPTCP: mptcp_pm_add_addr_received name=bpf_userspace, pm->ops->add_addr_received=0000000000000000 [ 36.623859][ C0] MPTCP: mptcp_pm_add_addr_received name=bpf_hashmap, pm->ops->add_addr_received=0000000000000000 # #187/1 mptcp/connect:OK # #187/2 mptcp/base:OK # #187/3 mptcp/mptcpify:OK # #187/4 mptcp/subflow:OK # #187/5 mptcp/iters_subflow:OK # #187/6 mptcp/netlink_pm:OK # #187/7 mptcp/bpf_netlink_pm:OK # #187/8 mptcp/userspace_pm:OK # #187/9 mptcp/bpf_userspace_pm:OK # #187/10 mptcp/iters_netlink_address:OK # #187/11 mptcp/iters_userspace_address:OK # #187/12 mptcp/bpf_hashmap_pm:OK # #187/13 mptcp/sockopt:OK # #187/14 mptcp/default:OK # #187/15 mptcp/first:OK # #187/16 mptcp/bkup:OK # #187/17 mptcp/rr:OK # #187/18 mptcp/red:OK # #187/19 mptcp/burst:OK # #187/20 mptcp/stale:OK # #187 mptcp:OK pm->ops->add_addr_received is set to NULL indeed, whether we use ".add_addr_received = (void *)NULL," so that it is explicitly set to NULL, or simply do not assign a new function to it but assign other function pointers. Thanks, -Geliang > > Cheers, > Matt
Hi Geliang, On 25/03/2025 05:15, Geliang Tang wrote: > On Mon, 2025-03-24 at 12:06 +0100, Matthieu Baerts wrote: >> On 24/03/2025 11:43, Geliang Tang wrote: >>> On Mon, 2025-03-24 at 11:26 +0100, Matthieu Baerts wrote: (...) >>>> does it mean that all function pointers will never be NULL and >>>> checks >>>> like 'pm->ops->add_addr_received' will always be true with a BPF >>>> PM? >>>> Or >>>> is it still OK to assign them to NULL for a new BPF PM? >>> >>> I think it's the latter, it's OK to assign them to NULL. >> >> If you have the infrastructure ready, can you check if you can set >> add_addr_received to NULL in a new BPF struct_ops mptcp_pm_ops for >> example please? Also, just to be sure, can you also check that in >> this >> case, in the pm.c, pm->ops->add_addr_received is also set to NULL and >> not to __bpf_mptcp_pm_add_addr_received? (not urgent) > > Sure, here's the test: (...) > pm->ops->add_addr_received is set to NULL indeed, whether we use > ".add_addr_received = (void *)NULL," so that it is explicitly set to > NULL, or simply do not assign a new function to it but assign other > function pointers. Good, thank you for having checked! So we can avoid worker operations (PM), and keeping the MPTCP retransmission callback optional (sched). Cheers, Matt -- Sponsored by the NGI0 Core fund.
Hi Geliang, On 21/03/2025 02:49, Geliang Tang wrote: > From: Geliang Tang <tanggeliang@kylinos.cn> > > This patch implements a new struct bpf_struct_ops for MPTCP BPF path > manager: bpf_mptcp_pm_ops. Register and unregister the bpf path manager > in .reg and .unreg. > > Add write access for some fields of struct mptcp_sock and struct > mptcp_pm_addr_entry in .btf_struct_access. > > This MPTCP BPF path manager implementation is similar to BPF TCP CC. And > net/ipv4/bpf_tcp_ca.c is a frame of reference for this patch. (...) > +static int bpf_mptcp_pm_btf_struct_access(struct bpf_verifier_log *log, > + const struct bpf_reg_state *reg, > + int off, int size) I don't know how it works exactly, but with BPF, can we not force a program to automatically take a lock (pm->lock) when trying to modify any of the fields below? Also, is there really a need for a BPF PM to modify any of these fields directly? Are most of them handled either by pm.c before calling a callback or are specific to the in-kernel PM? (...) > +static struct mptcp_pm_ops __bpf_mptcp_pm_ops = { > + .get_local_id = __bpf_mptcp_pm_get_local_id, > + .get_priority = __bpf_mptcp_pm_get_priority, > + .established = __bpf_mptcp_pm_established, > + .subflow_established = __bpf_mptcp_pm_subflow_established, > + .allow_new_subflow = __bpf_mptcp_pm_allow_new_subflow, > + .accept_new_subflow = __bpf_mptcp_pm_accept_new_subflow, There is a mix of spaces and tabs here above. Only use tabs? > + .add_addr_echo = __bpf_mptcp_pm_add_addr_echo, > + .add_addr_received = __bpf_mptcp_pm_add_addr_received, > + .rm_addr_received = __bpf_mptcp_pm_rm_addr_received, > + .init = __bpf_mptcp_pm_init, > + .release = __bpf_mptcp_pm_release, > +}; (...) Cheers, Matt -- Sponsored by the NGI0 Core fund.
© 2016 - 2025 Red Hat, Inc.