:p
atchew
Login
From: Geliang Tang <tanggeliang@kylinos.cn> RESEND: - rebased. - to trigger ai review. v2: - Include mptcp_bpf_update_proto within CONFIG_BPF_SYSCALL to fix the compilation errors reported by the kernel test robot. - Add checks for IS_ENABLED(CONFIG_MPTCP_IPV6). Implement psock_update_sk_prot, for basic MPTCP BPF SOCKMAP support. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/521 Geliang Tang (2): mptcp: implement psock_update_sk_prot selftests/bpf: Update sockmap tests for MPTCP net/mptcp/protocol.c | 105 ++++++++++++++++++ .../testing/selftests/bpf/prog_tests/mptcp.c | 4 +- 2 files changed, 107 insertions(+), 2 deletions(-) -- 2.53.0
From: Geliang Tang <tanggeliang@kylinos.cn> Add MPTCP support for BPF sockmap by implementing psock_update_sk_prot callback. This allows MPTCP sockets to dynamically switch protocol handlers when attached to or detached from sockmap programs. Separate protocol structures are maintained for IPv4/IPv6 and TX/RX configurations. tcp_bpf_update_proto() in net/ipv4/tcp_bpf.c is a frame of reference for this patch. Reported-by: kernel test robot <lkp@intel.com> Closes: https://lore.kernel.org/oe-kbuild-all/202512261144.DxrvwMS3-lkp@intel.com/ Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/521 Cc: Cong Wang <xiyou.wangcong@gmail.com> Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- net/mptcp/protocol.c | 105 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -XXX,XX +XXX,XX @@ #include <linux/netdevice.h> #include <linux/sched/signal.h> #include <linux/atomic.h> +#include <linux/skmsg.h> #include <net/aligned_data.h> #include <net/rps.h> #include <net/sock.h> @@ -XXX,XX +XXX,XX @@ static int mptcp_connect(struct sock *sk, struct sockaddr_unsized *uaddr, return 0; } +#ifdef CONFIG_BPF_SYSCALL +enum { + MPTCP_BPF_IPV4, + MPTCP_BPF_IPV6, + MPTCP_BPF_NUM_PROTS, +}; + +enum { + MPTCP_BPF_BASE, + MPTCP_BPF_TX, + MPTCP_BPF_RX, + MPTCP_BPF_TXRX, + MPTCP_BPF_NUM_CFGS, +}; + +static struct proto mptcp_bpf_prots[MPTCP_BPF_NUM_PROTS][MPTCP_BPF_NUM_CFGS]; + +static void mptcp_bpf_rebuild_protos(struct proto prot[MPTCP_BPF_NUM_CFGS], + struct proto *base) +{ + prot[MPTCP_BPF_BASE] = *base; + prot[MPTCP_BPF_BASE].destroy = sock_map_destroy; + prot[MPTCP_BPF_BASE].close = sock_map_close; + prot[MPTCP_BPF_BASE].sock_is_readable = sk_msg_is_readable; + + prot[MPTCP_BPF_TX] = prot[MPTCP_BPF_BASE]; + prot[MPTCP_BPF_RX] = prot[MPTCP_BPF_BASE]; + prot[MPTCP_BPF_TXRX] = prot[MPTCP_BPF_TX]; +} + +#if IS_ENABLED(CONFIG_MPTCP_IPV6) +static struct proto *mptcpv6_prot_saved __read_mostly; +static DEFINE_SPINLOCK(mptcpv6_prot_lock); + +static void mptcp_bpf_check_v6_needs_rebuild(struct proto *ops) +{ + /* + * Load with acquire semantics to ensure we see the latest protocol + * structure before checking for rebuild. + */ + if (unlikely(ops != smp_load_acquire(&mptcpv6_prot_saved))) { + spin_lock_bh(&mptcpv6_prot_lock); + if (likely(ops != mptcpv6_prot_saved)) { + mptcp_bpf_rebuild_protos(mptcp_bpf_prots[MPTCP_BPF_IPV6], ops); + /* Ensure mptcpv6_prot_saved update is visible before releasing lock */ + smp_store_release(&mptcpv6_prot_saved, ops); + } + spin_unlock_bh(&mptcpv6_prot_lock); + } +} + +static int mptcp_bpf_assert_proto_ops(struct proto *ops) +{ + /* In order to avoid retpoline, we make assumptions when we call + * into ops if e.g. a psock is not present. Make sure they are + * indeed valid assumptions. + */ + return ops->recvmsg == mptcp_recvmsg && + ops->sendmsg == mptcp_sendmsg ? 0 : -EOPNOTSUPP; +} +#endif + +static int mptcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) +{ + int family = sk->sk_family == AF_INET6 ? MPTCP_BPF_IPV6 : MPTCP_BPF_IPV4; + int config = psock->progs.msg_parser ? MPTCP_BPF_TX : MPTCP_BPF_BASE; + + if (psock->progs.stream_verdict || psock->progs.skb_verdict) + config = (config == MPTCP_BPF_TX) ? MPTCP_BPF_TXRX : MPTCP_BPF_RX; + + if (restore) { + sk->sk_write_space = psock->saved_write_space; + /* Pairs with lockless read in sk_clone_lock() */ + sock_replace_proto(sk, psock->sk_proto); + return 0; + } + +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + if (sk->sk_family == AF_INET6) { + if (mptcp_bpf_assert_proto_ops(psock->sk_proto)) + return -EINVAL; + + mptcp_bpf_check_v6_needs_rebuild(psock->sk_proto); + } +#endif + + /* Pairs with lockless read in sk_clone_lock() */ + sock_replace_proto(sk, &mptcp_bpf_prots[family][config]); + return 0; +} +#endif + static struct proto mptcp_prot = { .name = "MPTCP", .owner = THIS_MODULE, @@ -XXX,XX +XXX,XX @@ static struct proto mptcp_prot = { .obj_size = sizeof(struct mptcp_sock), .slab_flags = SLAB_TYPESAFE_BY_RCU, .no_autobind = true, +#ifdef CONFIG_BPF_SYSCALL + .psock_update_sk_prot = mptcp_bpf_update_proto, +#endif }; +#ifdef CONFIG_BPF_SYSCALL +static int __init mptcp_bpf_v4_build_proto(void) +{ + mptcp_bpf_rebuild_protos(mptcp_bpf_prots[MPTCP_BPF_IPV4], &mptcp_prot); + return 0; +} +late_initcall(mptcp_bpf_v4_build_proto); +#endif + static int mptcp_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len) { struct mptcp_sock *msk = mptcp_sk(sock->sk); -- 2.53.0
From: Geliang Tang <tanggeliang@kylinos.cn> Update sockmap tests to reflect new MPTCP support. MPTCP sockets are now allowed in sockmap, so test expectations are adjusted accordingly. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- tools/testing/selftests/bpf/prog_tests/mptcp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -XXX,XX +XXX,XX @@ static void test_sockmap_reject_mptcp(struct mptcp_sockmap *skel) server_fd = accept(listen_fd, NULL, 0); err = bpf_map_update_elem(bpf_map__fd(skel->maps.sock_map), &zero, &server_fd, BPF_NOEXIST); - if (!ASSERT_EQ(err, -EOPNOTSUPP, "server should be disallowed")) + if (!ASSERT_EQ(err, 0, "server should be allowed")) goto end; /* MPTCP client should also be disallowed */ err = bpf_map_update_elem(bpf_map__fd(skel->maps.sock_map), &zero, &client_fd1, BPF_NOEXIST); - if (!ASSERT_EQ(err, -EOPNOTSUPP, "client should be disallowed")) + if (!ASSERT_EQ(err, -EEXIST, "client should be allowed")) goto end; end: if (client_fd1 >= 0) -- 2.53.0
From: Geliang Tang <tanggeliang@kylinos.cn> v3: - Address the comments by ai review. - use WRITE_ONCE to set sk->sk_write_space. - update the selftest, set a new key for client socket. - Fix line length warnings. RESEND: - rebased. - to trigger ai review. v2: - Include mptcp_bpf_update_proto within CONFIG_BPF_SYSCALL to fix the compilation errors reported by the kernel test robot. - Add checks for IS_ENABLED(CONFIG_MPTCP_IPV6). Implement psock_update_sk_prot, for basic MPTCP BPF SOCKMAP support. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/521 Geliang Tang (2): mptcp: implement psock_update_sk_prot selftests/bpf: Update sockmap tests for MPTCP net/mptcp/protocol.c | 113 ++++++++++++++++++ .../testing/selftests/bpf/prog_tests/mptcp.c | 16 +-- 2 files changed, 121 insertions(+), 8 deletions(-) -- 2.51.0
From: Geliang Tang <tanggeliang@kylinos.cn> Add MPTCP support for BPF sockmap by implementing psock_update_sk_prot callback. This allows MPTCP sockets to dynamically switch protocol handlers when attached to or detached from sockmap programs. Separate protocol structures are maintained for IPv4/IPv6 and TX/RX configurations. tcp_bpf_update_proto() in net/ipv4/tcp_bpf.c is a frame of reference for this patch. Reported-by: kernel test robot <lkp@intel.com> Closes: https://lore.kernel.org/oe-kbuild-all/202512261144.DxrvwMS3-lkp@intel.com/ Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/521 Cc: Cong Wang <xiyou.wangcong@gmail.com> Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- net/mptcp/protocol.c | 113 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -XXX,XX +XXX,XX @@ #include <linux/netdevice.h> #include <linux/sched/signal.h> #include <linux/atomic.h> +#include <linux/skmsg.h> #include <net/aligned_data.h> #include <net/rps.h> #include <net/sock.h> @@ -XXX,XX +XXX,XX @@ static int mptcp_connect(struct sock *sk, struct sockaddr_unsized *uaddr, return 0; } +#ifdef CONFIG_BPF_SYSCALL +enum { + MPTCP_BPF_IPV4, + MPTCP_BPF_IPV6, + MPTCP_BPF_NUM_PROTS, +}; + +enum { + MPTCP_BPF_BASE, + MPTCP_BPF_TX, + MPTCP_BPF_RX, + MPTCP_BPF_TXRX, + MPTCP_BPF_NUM_CFGS, +}; + +static struct proto mptcp_bpf_prots[MPTCP_BPF_NUM_PROTS][MPTCP_BPF_NUM_CFGS]; + +static void mptcp_bpf_rebuild_protos(struct proto prot[MPTCP_BPF_NUM_CFGS], + struct proto *base) +{ + prot[MPTCP_BPF_BASE] = *base; + prot[MPTCP_BPF_BASE].destroy = sock_map_destroy; + prot[MPTCP_BPF_BASE].close = sock_map_close; + prot[MPTCP_BPF_BASE].sock_is_readable = sk_msg_is_readable; + + prot[MPTCP_BPF_TX] = prot[MPTCP_BPF_BASE]; + prot[MPTCP_BPF_RX] = prot[MPTCP_BPF_BASE]; + prot[MPTCP_BPF_TXRX] = prot[MPTCP_BPF_TX]; +} + +#if IS_ENABLED(CONFIG_MPTCP_IPV6) +static struct proto *mptcpv6_prot_saved __read_mostly; +static DEFINE_SPINLOCK(mptcpv6_prot_lock); + +static void mptcp_bpf_check_v6_needs_rebuild(struct proto *ops) +{ + /* + * Load with acquire semantics to ensure we see the latest protocol + * structure before checking for rebuild. + */ + if (unlikely(ops != smp_load_acquire(&mptcpv6_prot_saved))) { + spin_lock_bh(&mptcpv6_prot_lock); + if (likely(ops != mptcpv6_prot_saved)) { + mptcp_bpf_rebuild_protos(mptcp_bpf_prots[MPTCP_BPF_IPV6], + ops); + /* Ensure mptcpv6_prot_saved update is visible before + * releasing lock + */ + smp_store_release(&mptcpv6_prot_saved, ops); + } + spin_unlock_bh(&mptcpv6_prot_lock); + } +} + +static int mptcp_bpf_assert_proto_ops(struct proto *ops) +{ + /* In order to avoid retpoline, we make assumptions when we call + * into ops if e.g. a psock is not present. Make sure they are + * indeed valid assumptions. + */ + return ops->recvmsg == mptcp_recvmsg && + ops->sendmsg == mptcp_sendmsg ? 0 : -EOPNOTSUPP; +} +#endif + +static int mptcp_bpf_update_proto(struct sock *sk, + struct sk_psock *psock, + bool restore) +{ + int family = sk->sk_family == AF_INET6 ? MPTCP_BPF_IPV6 : + MPTCP_BPF_IPV4; + int config = psock->progs.msg_parser ? MPTCP_BPF_TX : + MPTCP_BPF_BASE; + + if (psock->progs.stream_verdict || psock->progs.skb_verdict) + config = (config == MPTCP_BPF_TX) ? MPTCP_BPF_TXRX : + MPTCP_BPF_RX; + + if (restore) { + WRITE_ONCE(sk->sk_write_space, psock->saved_write_space); + /* Pairs with lockless read in sk_clone_lock() */ + sock_replace_proto(sk, psock->sk_proto); + return 0; + } + +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + if (sk->sk_family == AF_INET6) { + if (mptcp_bpf_assert_proto_ops(psock->sk_proto)) + return -EINVAL; + + mptcp_bpf_check_v6_needs_rebuild(psock->sk_proto); + } +#endif + + /* Pairs with lockless read in sk_clone_lock() */ + sock_replace_proto(sk, &mptcp_bpf_prots[family][config]); + return 0; +} +#endif + static struct proto mptcp_prot = { .name = "MPTCP", .owner = THIS_MODULE, @@ -XXX,XX +XXX,XX @@ static struct proto mptcp_prot = { .obj_size = sizeof(struct mptcp_sock), .slab_flags = SLAB_TYPESAFE_BY_RCU, .no_autobind = true, +#ifdef CONFIG_BPF_SYSCALL + .psock_update_sk_prot = mptcp_bpf_update_proto, +#endif }; +#ifdef CONFIG_BPF_SYSCALL +static int __init mptcp_bpf_v4_build_proto(void) +{ + mptcp_bpf_rebuild_protos(mptcp_bpf_prots[MPTCP_BPF_IPV4], &mptcp_prot); + return 0; +} +late_initcall(mptcp_bpf_v4_build_proto); +#endif + static int mptcp_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len) { struct mptcp_sock *msk = mptcp_sk(sock->sk); -- 2.51.0
From: Geliang Tang <tanggeliang@kylinos.cn> Update sockmap tests to reflect new MPTCP support. MPTCP sockets are now allowed in sockmap, so test expectations are adjusted accordingly. Use a different key (1) for MPTCP client sockets to validate sockmap. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- tools/testing/selftests/bpf/prog_tests/mptcp.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -XXX,XX +XXX,XX @@ static void test_sockmap_with_mptcp_fallback(struct mptcp_sockmap *skel) close(listen_fd); } -/* Test sockmap rejection of MPTCP sockets - both server and client sides. */ -static void test_sockmap_reject_mptcp(struct mptcp_sockmap *skel) +/* Test sockmap of MPTCP sockets - both server and client sides. */ +static void test_sockmap_mptcp_support(struct mptcp_sockmap *skel) { int listen_fd = -1, server_fd = -1, client_fd1 = -1; - int err, zero = 0; + int err, zero = 0, one = 1; /* start server with MPTCP enabled */ listen_fd = start_mptcp_server(AF_INET, NULL, 0, 0); @@ -XXX,XX +XXX,XX @@ static void test_sockmap_reject_mptcp(struct mptcp_sockmap *skel) server_fd = accept(listen_fd, NULL, 0); err = bpf_map_update_elem(bpf_map__fd(skel->maps.sock_map), &zero, &server_fd, BPF_NOEXIST); - if (!ASSERT_EQ(err, -EOPNOTSUPP, "server should be disallowed")) + if (!ASSERT_EQ(err, 0, "server should be allowed")) goto end; - /* MPTCP client should also be disallowed */ + /* MPTCP client should also be allowed */ err = bpf_map_update_elem(bpf_map__fd(skel->maps.sock_map), - &zero, &client_fd1, BPF_NOEXIST); - if (!ASSERT_EQ(err, -EOPNOTSUPP, "client should be disallowed")) + &one, &client_fd1, BPF_NOEXIST); + if (!ASSERT_EQ(err, 0, "client should be allowed")) goto end; end: if (client_fd1 >= 0) @@ -XXX,XX +XXX,XX @@ static void test_mptcp_sockmap(void) goto close_netns; test_sockmap_with_mptcp_fallback(skel); - test_sockmap_reject_mptcp(skel); + test_sockmap_mptcp_support(skel); close_netns: netns_free(netns); -- 2.51.0