From: Geliang Tang <tanggeliang@kylinos.cn>
This patch implements a new struct bpf_struct_ops for MPTCP BPF path
manager: bpf_mptcp_pm_ops. Register and unregister the bpf path manager
in .reg and .unreg.
Add write access for some fields of struct mptcp_sock and struct
mptcp_pm_addr_entry in .btf_struct_access.
This MPTCP BPF path manager implementation is similar to BPF TCP CC. And
net/ipv4/bpf_tcp_ca.c is a frame of reference for this patch.
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
---
net/mptcp/bpf.c | 259 +++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 258 insertions(+), 1 deletion(-)
diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c
index 2b0cfb57df8c..596574102b89 100644
--- a/net/mptcp/bpf.c
+++ b/net/mptcp/bpf.c
@@ -17,10 +17,266 @@
#include "protocol.h"
#ifdef CONFIG_BPF_JIT
-static struct bpf_struct_ops bpf_mptcp_sched_ops;
+static struct bpf_struct_ops bpf_mptcp_pm_ops,
+ bpf_mptcp_sched_ops;
static u32 mptcp_sock_id,
+ mptcp_entry_id,
mptcp_subflow_id;
+/* MPTCP BPF path manager */
+
+static const struct bpf_func_proto *
+bpf_mptcp_pm_get_func_proto(enum bpf_func_id func_id,
+ const struct bpf_prog *prog)
+{
+ switch (func_id) {
+ case BPF_FUNC_sk_storage_get:
+ return &bpf_sk_storage_get_proto;
+ case BPF_FUNC_sk_storage_delete:
+ return &bpf_sk_storage_delete_proto;
+ default:
+ return bpf_base_func_proto(func_id, prog);
+ }
+}
+
+static int bpf_mptcp_pm_btf_struct_access(struct bpf_verifier_log *log,
+ const struct bpf_reg_state *reg,
+ int off, int size)
+{
+ u32 id = reg->btf_id;
+ size_t end;
+
+ if (id == mptcp_sock_id) {
+ switch (off) {
+ case offsetof(struct mptcp_sock, pm.remote.id):
+ end = offsetofend(struct mptcp_sock, pm.remote.id);
+ break;
+ case offsetof(struct mptcp_sock, pm.remote.family):
+ end = offsetofend(struct mptcp_sock, pm.remote.family);
+ break;
+ case offsetof(struct mptcp_sock, pm.remote.port):
+ end = offsetofend(struct mptcp_sock, pm.remote.port);
+ break;
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ case offsetof(struct mptcp_sock, pm.remote.addr6.s6_addr32[0]):
+ end = offsetofend(struct mptcp_sock, pm.remote.addr6.s6_addr32[0]);
+ break;
+ case offsetof(struct mptcp_sock, pm.remote.addr6.s6_addr32[1]):
+ end = offsetofend(struct mptcp_sock, pm.remote.addr6.s6_addr32[1]);
+ break;
+ case offsetof(struct mptcp_sock, pm.remote.addr6.s6_addr32[2]):
+ end = offsetofend(struct mptcp_sock, pm.remote.addr6.s6_addr32[2]);
+ break;
+ case offsetof(struct mptcp_sock, pm.remote.addr6.s6_addr32[3]):
+ end = offsetofend(struct mptcp_sock, pm.remote.addr6.s6_addr32[3]);
+ break;
+#else
+ case offsetof(struct mptcp_sock, pm.remote.addr.s_addr):
+ end = offsetofend(struct mptcp_sock, pm.remote.addr.s_addr);
+ break;
+#endif
+ case offsetof(struct mptcp_sock, pm.work_pending):
+ end = offsetofend(struct mptcp_sock, pm.work_pending);
+ break;
+ case offsetof(struct mptcp_sock, pm.accept_addr):
+ end = offsetofend(struct mptcp_sock, pm.accept_addr);
+ break;
+ case offsetof(struct mptcp_sock, pm.accept_subflow):
+ end = offsetofend(struct mptcp_sock, pm.accept_subflow);
+ break;
+ case offsetof(struct mptcp_sock, pm.add_addr_signaled):
+ end = offsetofend(struct mptcp_sock, pm.add_addr_signaled);
+ break;
+ case offsetof(struct mptcp_sock, pm.local_addr_used):
+ end = offsetofend(struct mptcp_sock, pm.local_addr_used);
+ break;
+ case offsetof(struct mptcp_sock, pm.subflows):
+ end = offsetofend(struct mptcp_sock, pm.subflows);
+ break;
+ default:
+ bpf_log(log, "no write support to mptcp_sock at off %d\n",
+ off);
+ return -EACCES;
+ }
+ } else if (id == mptcp_entry_id) {
+ switch (off) {
+ case offsetof(struct mptcp_pm_addr_entry, addr.id):
+ end = offsetofend(struct mptcp_pm_addr_entry, addr.id);
+ break;
+ case offsetof(struct mptcp_pm_addr_entry, addr.port):
+ end = offsetofend(struct mptcp_pm_addr_entry, addr.port);
+ break;
+ default:
+ bpf_log(log, "no write support to mptcp_pm_addr_entry at off %d\n",
+ off);
+ return -EACCES;
+ }
+ } else {
+ bpf_log(log, "only access to mptcp sock or addr or entry is supported\n");
+ return -EACCES;
+ }
+
+ if (off + size > end) {
+ bpf_log(log, "access beyond %s at off %u size %u ended at %zu",
+ id == mptcp_sock_id ? "mptcp_sock" :
+ (id == mptcp_entry_id ? "mptcp_pm_addr_entry" : "mptcp_addr_info"),
+ off, size, end);
+ return -EACCES;
+ }
+
+ return NOT_INIT;
+}
+
+static const struct bpf_verifier_ops bpf_mptcp_pm_verifier_ops = {
+ .get_func_proto = bpf_mptcp_pm_get_func_proto,
+ .is_valid_access = bpf_tracing_btf_ctx_access,
+ .btf_struct_access = bpf_mptcp_pm_btf_struct_access,
+};
+
+static int bpf_mptcp_pm_reg(void *kdata, struct bpf_link *link)
+{
+ return mptcp_pm_register(kdata);
+}
+
+static void bpf_mptcp_pm_unreg(void *kdata, struct bpf_link *link)
+{
+ mptcp_pm_unregister(kdata);
+}
+
+static int bpf_mptcp_pm_check_member(const struct btf_type *t,
+ const struct btf_member *member,
+ const struct bpf_prog *prog)
+{
+ return 0;
+}
+
+static int bpf_mptcp_pm_init_member(const struct btf_type *t,
+ const struct btf_member *member,
+ void *kdata, const void *udata)
+{
+ const struct mptcp_pm_ops *upm;
+ struct mptcp_pm_ops *pm;
+ u32 moff;
+
+ upm = (const struct mptcp_pm_ops *)udata;
+ pm = (struct mptcp_pm_ops *)kdata;
+
+ moff = __btf_member_bit_offset(t, member) / 8;
+ switch (moff) {
+ case offsetof(struct mptcp_pm_ops, name):
+ if (bpf_obj_name_cpy(pm->name, upm->name,
+ sizeof(pm->name)) <= 0)
+ return -EINVAL;
+ return 1;
+ }
+
+ return 0;
+}
+
+static int bpf_mptcp_pm_init(struct btf *btf)
+{
+ s32 type_id;
+
+ type_id = btf_find_by_name_kind(btf, "mptcp_sock",
+ BTF_KIND_STRUCT);
+ if (type_id < 0)
+ return -EINVAL;
+ mptcp_sock_id = type_id;
+
+ type_id = btf_find_by_name_kind(btf, "mptcp_pm_addr_entry",
+ BTF_KIND_STRUCT);
+ if (type_id < 0)
+ return -EINVAL;
+ mptcp_entry_id = type_id;
+
+ return 0;
+}
+
+static int bpf_mptcp_pm_validate(void *kdata)
+{
+ return mptcp_pm_validate(kdata);
+}
+
+static int __bpf_mptcp_pm_get_local_id(struct mptcp_sock *msk,
+ struct mptcp_pm_addr_entry *skc)
+{
+ return 0;
+}
+
+static bool __bpf_mptcp_pm_get_priority(struct mptcp_sock *msk,
+ struct mptcp_addr_info *skc)
+{
+ return false;
+}
+
+static void __bpf_mptcp_pm_established(struct mptcp_sock *msk)
+{
+}
+
+static void __bpf_mptcp_pm_subflow_established(struct mptcp_sock *msk)
+{
+}
+
+static bool __bpf_mptcp_pm_allow_new_subflow(struct mptcp_sock *msk)
+{
+ return false;
+}
+
+static bool __bpf_mptcp_pm_accept_new_subflow(const struct mptcp_sock *msk)
+{
+ return false;
+}
+
+static bool __bpf_mptcp_pm_add_addr_echo(struct mptcp_sock *msk,
+ const struct mptcp_addr_info *addr)
+{
+ return false;
+}
+
+static int __bpf_mptcp_pm_add_addr_received(struct mptcp_sock *msk,
+ const struct mptcp_addr_info *addr)
+{
+ return 0;
+}
+
+static void __bpf_mptcp_pm_rm_addr_received(struct mptcp_sock *msk)
+{
+}
+
+static void __bpf_mptcp_pm_init(struct mptcp_sock *msk)
+{
+}
+
+static void __bpf_mptcp_pm_release(struct mptcp_sock *msk)
+{
+}
+
+static struct mptcp_pm_ops __bpf_mptcp_pm_ops = {
+ .get_local_id = __bpf_mptcp_pm_get_local_id,
+ .get_priority = __bpf_mptcp_pm_get_priority,
+ .established = __bpf_mptcp_pm_established,
+ .subflow_established = __bpf_mptcp_pm_subflow_established,
+ .allow_new_subflow = __bpf_mptcp_pm_allow_new_subflow,
+ .accept_new_subflow = __bpf_mptcp_pm_accept_new_subflow,
+ .add_addr_echo = __bpf_mptcp_pm_add_addr_echo,
+ .add_addr_received = __bpf_mptcp_pm_add_addr_received,
+ .rm_addr_received = __bpf_mptcp_pm_rm_addr_received,
+ .init = __bpf_mptcp_pm_init,
+ .release = __bpf_mptcp_pm_release,
+};
+
+static struct bpf_struct_ops bpf_mptcp_pm_ops = {
+ .verifier_ops = &bpf_mptcp_pm_verifier_ops,
+ .reg = bpf_mptcp_pm_reg,
+ .unreg = bpf_mptcp_pm_unreg,
+ .check_member = bpf_mptcp_pm_check_member,
+ .init_member = bpf_mptcp_pm_init_member,
+ .init = bpf_mptcp_pm_init,
+ .validate = bpf_mptcp_pm_validate,
+ .name = "mptcp_pm_ops",
+ .cfi_stubs = &__bpf_mptcp_pm_ops,
+};
+
/* MPTCP BPF packet scheduler */
static const struct bpf_func_proto *
@@ -332,6 +588,7 @@ static int __init bpf_mptcp_kfunc_init(void)
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
&bpf_mptcp_common_kfunc_set);
#ifdef CONFIG_BPF_JIT
+ ret = ret ?: register_bpf_struct_ops(&bpf_mptcp_pm_ops, mptcp_pm_ops);
ret = ret ?: register_bpf_struct_ops(&bpf_mptcp_sched_ops, mptcp_sched_ops);
#endif
--
2.43.0
Hi Geliang,
On 21/03/2025 02:49, Geliang Tang wrote:
> From: Geliang Tang <tanggeliang@kylinos.cn>
>
> This patch implements a new struct bpf_struct_ops for MPTCP BPF path
> manager: bpf_mptcp_pm_ops. Register and unregister the bpf path manager
> in .reg and .unreg.
>
> Add write access for some fields of struct mptcp_sock and struct
> mptcp_pm_addr_entry in .btf_struct_access.
>
> This MPTCP BPF path manager implementation is similar to BPF TCP CC. And
> net/ipv4/bpf_tcp_ca.c is a frame of reference for this patch.
>
> Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
> ---
> net/mptcp/bpf.c | 259 +++++++++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 258 insertions(+), 1 deletion(-)
>
> diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c
> index 2b0cfb57df8c..596574102b89 100644
> --- a/net/mptcp/bpf.c
> +++ b/net/mptcp/bpf.c
(...)
> +static int __bpf_mptcp_pm_get_local_id(struct mptcp_sock *msk,
> + struct mptcp_pm_addr_entry *skc)
> +{
> + return 0;
> +}
> +
> +static bool __bpf_mptcp_pm_get_priority(struct mptcp_sock *msk,
> + struct mptcp_addr_info *skc)
> +{
> + return false;
> +}
> +
> +static void __bpf_mptcp_pm_established(struct mptcp_sock *msk)
> +{
> +}
> +
> +static void __bpf_mptcp_pm_subflow_established(struct mptcp_sock *msk)
> +{
> +}
> +
> +static bool __bpf_mptcp_pm_allow_new_subflow(struct mptcp_sock *msk)
> +{
> + return false;
> +}
> +
> +static bool __bpf_mptcp_pm_accept_new_subflow(const struct mptcp_sock *msk)
> +{
> + return false;
> +}
> +
> +static bool __bpf_mptcp_pm_add_addr_echo(struct mptcp_sock *msk,
> + const struct mptcp_addr_info *addr)
> +{
> + return false;
> +}
> +
> +static int __bpf_mptcp_pm_add_addr_received(struct mptcp_sock *msk,
> + const struct mptcp_addr_info *addr)
> +{
> + return 0;
> +}
> +
> +static void __bpf_mptcp_pm_rm_addr_received(struct mptcp_sock *msk)
> +{
> +}
> +
> +static void __bpf_mptcp_pm_init(struct mptcp_sock *msk)
> +{
> +}
> +
> +static void __bpf_mptcp_pm_release(struct mptcp_sock *msk)
> +{
> +}
> +
> +static struct mptcp_pm_ops __bpf_mptcp_pm_ops = {
> + .get_local_id = __bpf_mptcp_pm_get_local_id,
> + .get_priority = __bpf_mptcp_pm_get_priority,
> + .established = __bpf_mptcp_pm_established,
> + .subflow_established = __bpf_mptcp_pm_subflow_established,
> + .allow_new_subflow = __bpf_mptcp_pm_allow_new_subflow,
> + .accept_new_subflow = __bpf_mptcp_pm_accept_new_subflow,
> + .add_addr_echo = __bpf_mptcp_pm_add_addr_echo,
> + .add_addr_received = __bpf_mptcp_pm_add_addr_received,
> + .rm_addr_received = __bpf_mptcp_pm_rm_addr_received,
Out of curiosity: I see here that even the optional hooks are assigned:
does it mean that all function pointers will never be NULL and checks
like 'pm->ops->add_addr_received' will always be true with a BPF PM? Or
is it still OK to assign them to NULL for a new BPF PM?
Cheers,
Matt
--
Sponsored by the NGI0 Core fund.
On Mon, 2025-03-24 at 11:26 +0100, Matthieu Baerts wrote:
> Hi Geliang,
>
> On 21/03/2025 02:49, Geliang Tang wrote:
> > From: Geliang Tang <tanggeliang@kylinos.cn>
> >
> > This patch implements a new struct bpf_struct_ops for MPTCP BPF
> > path
> > manager: bpf_mptcp_pm_ops. Register and unregister the bpf path
> > manager
> > in .reg and .unreg.
> >
> > Add write access for some fields of struct mptcp_sock and struct
> > mptcp_pm_addr_entry in .btf_struct_access.
> >
> > This MPTCP BPF path manager implementation is similar to BPF TCP
> > CC. And
> > net/ipv4/bpf_tcp_ca.c is a frame of reference for this patch.
> >
> > Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
> > ---
> > net/mptcp/bpf.c | 259
> > +++++++++++++++++++++++++++++++++++++++++++++++-
> > 1 file changed, 258 insertions(+), 1 deletion(-)
> >
> > diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c
> > index 2b0cfb57df8c..596574102b89 100644
> > --- a/net/mptcp/bpf.c
> > +++ b/net/mptcp/bpf.c
>
> (...)
>
> > +static int __bpf_mptcp_pm_get_local_id(struct mptcp_sock *msk,
> > + struct mptcp_pm_addr_entry
> > *skc)
> > +{
> > + return 0;
> > +}
> > +
> > +static bool __bpf_mptcp_pm_get_priority(struct mptcp_sock *msk,
> > + struct mptcp_addr_info
> > *skc)
> > +{
> > + return false;
> > +}
> > +
> > +static void __bpf_mptcp_pm_established(struct mptcp_sock *msk)
> > +{
> > +}
> > +
> > +static void __bpf_mptcp_pm_subflow_established(struct mptcp_sock
> > *msk)
> > +{
> > +}
> > +
> > +static bool __bpf_mptcp_pm_allow_new_subflow(struct mptcp_sock
> > *msk)
> > +{
> > + return false;
> > +}
> > +
> > +static bool __bpf_mptcp_pm_accept_new_subflow(const struct
> > mptcp_sock *msk)
> > +{
> > + return false;
> > +}
> > +
> > +static bool __bpf_mptcp_pm_add_addr_echo(struct mptcp_sock *msk,
> > + const struct
> > mptcp_addr_info *addr)
> > +{
> > + return false;
> > +}
> > +
> > +static int __bpf_mptcp_pm_add_addr_received(struct mptcp_sock
> > *msk,
> > + const struct
> > mptcp_addr_info *addr)
> > +{
> > + return 0;
> > +}
> > +
> > +static void __bpf_mptcp_pm_rm_addr_received(struct mptcp_sock
> > *msk)
> > +{
> > +}
> > +
> > +static void __bpf_mptcp_pm_init(struct mptcp_sock *msk)
> > +{
> > +}
> > +
> > +static void __bpf_mptcp_pm_release(struct mptcp_sock *msk)
> > +{
> > +}
> > +
> > +static struct mptcp_pm_ops __bpf_mptcp_pm_ops = {
> > + .get_local_id = __bpf_mptcp_pm_get_local_id,
> > + .get_priority = __bpf_mptcp_pm_get_priority,
> > + .established = __bpf_mptcp_pm_established,
> > + .subflow_established =
> > __bpf_mptcp_pm_subflow_established,
> > + .allow_new_subflow =
> > __bpf_mptcp_pm_allow_new_subflow,
> > + .accept_new_subflow =
> > __bpf_mptcp_pm_accept_new_subflow,
> > + .add_addr_echo = __bpf_mptcp_pm_add_addr_echo,
> > + .add_addr_received =
> > __bpf_mptcp_pm_add_addr_received,
> > + .rm_addr_received = __bpf_mptcp_pm_rm_addr_received,
>
> Out of curiosity: I see here that even the optional hooks are
> assigned:
Optional hooks must be assigned here, otherwise this hook cannot be
defined in BPF.
> does it mean that all function pointers will never be NULL and checks
> like 'pm->ops->add_addr_received' will always be true with a BPF PM?
> Or
> is it still OK to assign them to NULL for a new BPF PM?
I think it's the latter, it's OK to assign them to NULL.
Thanks,
-Geliang
>
> Cheers,
> Matt
Hi Geliang,
On 24/03/2025 11:43, Geliang Tang wrote:
> On Mon, 2025-03-24 at 11:26 +0100, Matthieu Baerts wrote:
>> Hi Geliang,
>>
>> On 21/03/2025 02:49, Geliang Tang wrote:
>>> From: Geliang Tang <tanggeliang@kylinos.cn>
>>>
>>> This patch implements a new struct bpf_struct_ops for MPTCP BPF
>>> path
>>> manager: bpf_mptcp_pm_ops. Register and unregister the bpf path
>>> manager
>>> in .reg and .unreg.
>>>
>>> Add write access for some fields of struct mptcp_sock and struct
>>> mptcp_pm_addr_entry in .btf_struct_access.
>>>
>>> This MPTCP BPF path manager implementation is similar to BPF TCP
>>> CC. And
>>> net/ipv4/bpf_tcp_ca.c is a frame of reference for this patch.
>>>
>>> Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
>>> ---
>>> net/mptcp/bpf.c | 259
>>> +++++++++++++++++++++++++++++++++++++++++++++++-
>>> 1 file changed, 258 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c
>>> index 2b0cfb57df8c..596574102b89 100644
>>> --- a/net/mptcp/bpf.c
>>> +++ b/net/mptcp/bpf.c
>>
>> (...)
>>
>>> +static int __bpf_mptcp_pm_get_local_id(struct mptcp_sock *msk,
>>> + struct mptcp_pm_addr_entry
>>> *skc)
>>> +{
>>> + return 0;
>>> +}
>>> +
>>> +static bool __bpf_mptcp_pm_get_priority(struct mptcp_sock *msk,
>>> + struct mptcp_addr_info
>>> *skc)
>>> +{
>>> + return false;
>>> +}
>>> +
>>> +static void __bpf_mptcp_pm_established(struct mptcp_sock *msk)
>>> +{
>>> +}
>>> +
>>> +static void __bpf_mptcp_pm_subflow_established(struct mptcp_sock
>>> *msk)
>>> +{
>>> +}
>>> +
>>> +static bool __bpf_mptcp_pm_allow_new_subflow(struct mptcp_sock
>>> *msk)
>>> +{
>>> + return false;
>>> +}
>>> +
>>> +static bool __bpf_mptcp_pm_accept_new_subflow(const struct
>>> mptcp_sock *msk)
>>> +{
>>> + return false;
>>> +}
>>> +
>>> +static bool __bpf_mptcp_pm_add_addr_echo(struct mptcp_sock *msk,
>>> + const struct
>>> mptcp_addr_info *addr)
>>> +{
>>> + return false;
>>> +}
>>> +
>>> +static int __bpf_mptcp_pm_add_addr_received(struct mptcp_sock
>>> *msk,
>>> + const struct
>>> mptcp_addr_info *addr)
>>> +{
>>> + return 0;
>>> +}
>>> +
>>> +static void __bpf_mptcp_pm_rm_addr_received(struct mptcp_sock
>>> *msk)
>>> +{
>>> +}
>>> +
>>> +static void __bpf_mptcp_pm_init(struct mptcp_sock *msk)
>>> +{
>>> +}
>>> +
>>> +static void __bpf_mptcp_pm_release(struct mptcp_sock *msk)
>>> +{
>>> +}
>>> +
>>> +static struct mptcp_pm_ops __bpf_mptcp_pm_ops = {
>>> + .get_local_id = __bpf_mptcp_pm_get_local_id,
>>> + .get_priority = __bpf_mptcp_pm_get_priority,
>>> + .established = __bpf_mptcp_pm_established,
>>> + .subflow_established =
>>> __bpf_mptcp_pm_subflow_established,
>>> + .allow_new_subflow =
>>> __bpf_mptcp_pm_allow_new_subflow,
>>> + .accept_new_subflow =
>>> __bpf_mptcp_pm_accept_new_subflow,
>>> + .add_addr_echo = __bpf_mptcp_pm_add_addr_echo,
>>> + .add_addr_received =
>>> __bpf_mptcp_pm_add_addr_received,
>>> + .rm_addr_received = __bpf_mptcp_pm_rm_addr_received,
>>
>> Out of curiosity: I see here that even the optional hooks are
>> assigned:
>
> Optional hooks must be assigned here, otherwise this hook cannot be
> defined in BPF.
OK, thanks!
>> does it mean that all function pointers will never be NULL and checks
>> like 'pm->ops->add_addr_received' will always be true with a BPF PM?
>> Or
>> is it still OK to assign them to NULL for a new BPF PM?
>
> I think it's the latter, it's OK to assign them to NULL.
If you have the infrastructure ready, can you check if you can set
add_addr_received to NULL in a new BPF struct_ops mptcp_pm_ops for
example please? Also, just to be sure, can you also check that in this
case, in the pm.c, pm->ops->add_addr_received is also set to NULL and
not to __bpf_mptcp_pm_add_addr_received? (not urgent)
Cheers,
Matt
--
Sponsored by the NGI0 Core fund.
On Mon, 2025-03-24 at 12:06 +0100, Matthieu Baerts wrote:
> Hi Geliang,
>
> On 24/03/2025 11:43, Geliang Tang wrote:
> > On Mon, 2025-03-24 at 11:26 +0100, Matthieu Baerts wrote:
> > > Hi Geliang,
> > >
> > > On 21/03/2025 02:49, Geliang Tang wrote:
> > > > From: Geliang Tang <tanggeliang@kylinos.cn>
> > > >
> > > > This patch implements a new struct bpf_struct_ops for MPTCP BPF
> > > > path
> > > > manager: bpf_mptcp_pm_ops. Register and unregister the bpf path
> > > > manager
> > > > in .reg and .unreg.
> > > >
> > > > Add write access for some fields of struct mptcp_sock and
> > > > struct
> > > > mptcp_pm_addr_entry in .btf_struct_access.
> > > >
> > > > This MPTCP BPF path manager implementation is similar to BPF
> > > > TCP
> > > > CC. And
> > > > net/ipv4/bpf_tcp_ca.c is a frame of reference for this patch.
> > > >
> > > > Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
> > > > ---
> > > > net/mptcp/bpf.c | 259
> > > > +++++++++++++++++++++++++++++++++++++++++++++++-
> > > > 1 file changed, 258 insertions(+), 1 deletion(-)
> > > >
> > > > diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c
> > > > index 2b0cfb57df8c..596574102b89 100644
> > > > --- a/net/mptcp/bpf.c
> > > > +++ b/net/mptcp/bpf.c
> > >
> > > (...)
> > >
> > > > +static int __bpf_mptcp_pm_get_local_id(struct mptcp_sock *msk,
> > > > + struct
> > > > mptcp_pm_addr_entry
> > > > *skc)
> > > > +{
> > > > + return 0;
> > > > +}
> > > > +
> > > > +static bool __bpf_mptcp_pm_get_priority(struct mptcp_sock
> > > > *msk,
> > > > + struct mptcp_addr_info
> > > > *skc)
> > > > +{
> > > > + return false;
> > > > +}
> > > > +
> > > > +static void __bpf_mptcp_pm_established(struct mptcp_sock *msk)
> > > > +{
> > > > +}
> > > > +
> > > > +static void __bpf_mptcp_pm_subflow_established(struct
> > > > mptcp_sock
> > > > *msk)
> > > > +{
> > > > +}
> > > > +
> > > > +static bool __bpf_mptcp_pm_allow_new_subflow(struct mptcp_sock
> > > > *msk)
> > > > +{
> > > > + return false;
> > > > +}
> > > > +
> > > > +static bool __bpf_mptcp_pm_accept_new_subflow(const struct
> > > > mptcp_sock *msk)
> > > > +{
> > > > + return false;
> > > > +}
> > > > +
> > > > +static bool __bpf_mptcp_pm_add_addr_echo(struct mptcp_sock
> > > > *msk,
> > > > + const struct
> > > > mptcp_addr_info *addr)
> > > > +{
> > > > + return false;
> > > > +}
> > > > +
> > > > +static int __bpf_mptcp_pm_add_addr_received(struct mptcp_sock
> > > > *msk,
> > > > + const struct
> > > > mptcp_addr_info *addr)
> > > > +{
> > > > + return 0;
> > > > +}
> > > > +
> > > > +static void __bpf_mptcp_pm_rm_addr_received(struct mptcp_sock
> > > > *msk)
> > > > +{
> > > > +}
> > > > +
> > > > +static void __bpf_mptcp_pm_init(struct mptcp_sock *msk)
> > > > +{
> > > > +}
> > > > +
> > > > +static void __bpf_mptcp_pm_release(struct mptcp_sock *msk)
> > > > +{
> > > > +}
> > > > +
> > > > +static struct mptcp_pm_ops __bpf_mptcp_pm_ops = {
> > > > + .get_local_id = __bpf_mptcp_pm_get_local_id,
> > > > + .get_priority = __bpf_mptcp_pm_get_priority,
> > > > + .established = __bpf_mptcp_pm_established,
> > > > + .subflow_established =
> > > > __bpf_mptcp_pm_subflow_established,
> > > > + .allow_new_subflow =
> > > > __bpf_mptcp_pm_allow_new_subflow,
> > > > + .accept_new_subflow =
> > > > __bpf_mptcp_pm_accept_new_subflow,
> > > > + .add_addr_echo =
> > > > __bpf_mptcp_pm_add_addr_echo,
> > > > + .add_addr_received =
> > > > __bpf_mptcp_pm_add_addr_received,
> > > > + .rm_addr_received =
> > > > __bpf_mptcp_pm_rm_addr_received,
> > >
> > > Out of curiosity: I see here that even the optional hooks are
> > > assigned:
> >
> > Optional hooks must be assigned here, otherwise this hook cannot be
> > defined in BPF.
>
> OK, thanks!
>
> > > does it mean that all function pointers will never be NULL and
> > > checks
> > > like 'pm->ops->add_addr_received' will always be true with a BPF
> > > PM?
> > > Or
> > > is it still OK to assign them to NULL for a new BPF PM?
> >
> > I think it's the latter, it's OK to assign them to NULL.
>
> If you have the infrastructure ready, can you check if you can set
> add_addr_received to NULL in a new BPF struct_ops mptcp_pm_ops for
> example please? Also, just to be sure, can you also check that in
> this
> case, in the pm.c, pm->ops->add_addr_received is also set to NULL and
> not to __bpf_mptcp_pm_add_addr_received? (not urgent)
Sure, here's the test:
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index f9fed096d77c..6bdca0dcf21e 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -578,6 +578,9 @@ void mptcp_pm_add_addr_received(const struct sock
*ssk,
pr_debug("msk=%p remote_id=%d accept=%d\n", msk, addr->id,
READ_ONCE(pm->accept_addr));
+ pr_info("%s name=%s, pm->ops->add_addr_received=%p\n",
+ __func__, pm->ops->name, pm->ops->add_addr_received);
+
mptcp_event_addr_announced(ssk, addr);
spin_lock_bh(&pm->lock);
diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_userspace_pm.c
b/tools/testing/selftests/bpf/progs/mptcp_bpf_userspace_pm.c
index 2f8e0e85b5d7..8aa4b8c9ce33 100644
--- a/tools/testing/selftests/bpf/progs/mptcp_bpf_userspace_pm.c
+++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_userspace_pm.c
@@ -265,4 +265,5 @@ struct mptcp_pm_ops bpf_userspace = {
.init = (void *)mptcp_pm_userspace_init,
.release = (void *)mptcp_pm_userspace_release,
.name = "bpf_userspace",
+ .add_addr_received = (void *)NULL,
};
And the output:
[ 18.229067][ C0] MPTCP: mptcp_pm_add_addr_received name=kernel,
pm->ops->add_addr_received=00000000cd865d66
[ 18.231316][ C0] MPTCP: mptcp_pm_add_addr_received name=kernel,
pm->ops->add_addr_received=00000000cd865d66
[ 21.105658][ C0] MPTCP: mptcp_pm_add_addr_received
name=bpf_netlink, pm->ops->add_addr_received=00000000fe7b7426
[ 21.106419][ C0] MPTCP: mptcp_pm_add_addr_received
name=bpf_netlink, pm->ops->add_addr_received=00000000fe7b7426
[ 24.767318][ C0] MPTCP: mptcp_pm_add_addr_received
name=userspace, pm->ops->add_addr_received=0000000000000000
[ 28.220824][ C0] MPTCP: mptcp_pm_add_addr_received
name=bpf_userspace, pm->ops->add_addr_received=0000000000000000
[ 36.623859][ C0] MPTCP: mptcp_pm_add_addr_received
name=bpf_hashmap, pm->ops->add_addr_received=0000000000000000
# #187/1 mptcp/connect:OK
# #187/2 mptcp/base:OK
# #187/3 mptcp/mptcpify:OK
# #187/4 mptcp/subflow:OK
# #187/5 mptcp/iters_subflow:OK
# #187/6 mptcp/netlink_pm:OK
# #187/7 mptcp/bpf_netlink_pm:OK
# #187/8 mptcp/userspace_pm:OK
# #187/9 mptcp/bpf_userspace_pm:OK
# #187/10 mptcp/iters_netlink_address:OK
# #187/11 mptcp/iters_userspace_address:OK
# #187/12 mptcp/bpf_hashmap_pm:OK
# #187/13 mptcp/sockopt:OK
# #187/14 mptcp/default:OK
# #187/15 mptcp/first:OK
# #187/16 mptcp/bkup:OK
# #187/17 mptcp/rr:OK
# #187/18 mptcp/red:OK
# #187/19 mptcp/burst:OK
# #187/20 mptcp/stale:OK
# #187 mptcp:OK
pm->ops->add_addr_received is set to NULL indeed, whether we use
".add_addr_received = (void *)NULL," so that it is explicitly set to
NULL, or simply do not assign a new function to it but assign other
function pointers.
Thanks,
-Geliang
>
> Cheers,
> Matt
Hi Geliang, On 25/03/2025 05:15, Geliang Tang wrote: > On Mon, 2025-03-24 at 12:06 +0100, Matthieu Baerts wrote: >> On 24/03/2025 11:43, Geliang Tang wrote: >>> On Mon, 2025-03-24 at 11:26 +0100, Matthieu Baerts wrote: (...) >>>> does it mean that all function pointers will never be NULL and >>>> checks >>>> like 'pm->ops->add_addr_received' will always be true with a BPF >>>> PM? >>>> Or >>>> is it still OK to assign them to NULL for a new BPF PM? >>> >>> I think it's the latter, it's OK to assign them to NULL. >> >> If you have the infrastructure ready, can you check if you can set >> add_addr_received to NULL in a new BPF struct_ops mptcp_pm_ops for >> example please? Also, just to be sure, can you also check that in >> this >> case, in the pm.c, pm->ops->add_addr_received is also set to NULL and >> not to __bpf_mptcp_pm_add_addr_received? (not urgent) > > Sure, here's the test: (...) > pm->ops->add_addr_received is set to NULL indeed, whether we use > ".add_addr_received = (void *)NULL," so that it is explicitly set to > NULL, or simply do not assign a new function to it but assign other > function pointers. Good, thank you for having checked! So we can avoid worker operations (PM), and keeping the MPTCP retransmission callback optional (sched). Cheers, Matt -- Sponsored by the NGI0 Core fund.
Hi Geliang,
On 21/03/2025 02:49, Geliang Tang wrote:
> From: Geliang Tang <tanggeliang@kylinos.cn>
>
> This patch implements a new struct bpf_struct_ops for MPTCP BPF path
> manager: bpf_mptcp_pm_ops. Register and unregister the bpf path manager
> in .reg and .unreg.
>
> Add write access for some fields of struct mptcp_sock and struct
> mptcp_pm_addr_entry in .btf_struct_access.
>
> This MPTCP BPF path manager implementation is similar to BPF TCP CC. And
> net/ipv4/bpf_tcp_ca.c is a frame of reference for this patch.
(...)
> +static int bpf_mptcp_pm_btf_struct_access(struct bpf_verifier_log *log,
> + const struct bpf_reg_state *reg,
> + int off, int size)
I don't know how it works exactly, but with BPF, can we not force a
program to automatically take a lock (pm->lock) when trying to modify
any of the fields below?
Also, is there really a need for a BPF PM to modify any of these fields
directly?
Are most of them handled either by pm.c before calling a callback or are
specific to the in-kernel PM?
(...)
> +static struct mptcp_pm_ops __bpf_mptcp_pm_ops = {
> + .get_local_id = __bpf_mptcp_pm_get_local_id,
> + .get_priority = __bpf_mptcp_pm_get_priority,
> + .established = __bpf_mptcp_pm_established,
> + .subflow_established = __bpf_mptcp_pm_subflow_established,
> + .allow_new_subflow = __bpf_mptcp_pm_allow_new_subflow,
> + .accept_new_subflow = __bpf_mptcp_pm_accept_new_subflow,
There is a mix of spaces and tabs here above. Only use tabs?
> + .add_addr_echo = __bpf_mptcp_pm_add_addr_echo,
> + .add_addr_received = __bpf_mptcp_pm_add_addr_received,
> + .rm_addr_received = __bpf_mptcp_pm_rm_addr_received,
> + .init = __bpf_mptcp_pm_init,
> + .release = __bpf_mptcp_pm_release,
> +};
(...)
Cheers,
Matt
--
Sponsored by the NGI0 Core fund.
© 2016 - 2026 Red Hat, Inc.