:p
atchew
Login
This small series makes the MPTCP sockopt codepaths consistent with TCP and the core socket layer by using the BPF-aware sockopt_lock_sock()/sockopt_release_sock() helpers introduced in commit 24426654ed3a ("bpf: net: Avoid sk_setsockopt() taking sk lock when called from bpf"). Patch 1 switches all lock_sock()/release_sock()/lock_sock_fast() calls in MPTCP sockopt handlers to use the BPF-aware wrappers, avoiding the risk of sleeping in atomic context when lock_sock_fast() is used. Patch 2 switches ns_capable() to sockopt_ns_capable() in the congestion control setsockopt path, properly handling the case where BPF programs invoke setsockopt from softirq context. Both patches are fixes that should have been part of the original BPF sockopt series. Changelog: v3: - Remove the special symbols in v2. - Use sockopt_ns_capable to replace ns_capable. v2: Link: https://patchwork.kernel.org/project/mptcp/patch/20260422091927.77770-3-gang.yan@linux.dev/ Signed-off-by: Gang Yan <yangang@kylinos.cn> --- Gang Yan (2): mptcp: use sockopt_lock(release)_sock in sockopt mptcp: use sockopt_ns_capable() in setsockopt congestion control net/mptcp/sockopt.c | 123 ++++++++++++++++++++++++++-------------------------- 1 file changed, 61 insertions(+), 62 deletions(-) --- base-commit: aa15c271d79edde595fb6f4eedb52fbc16325a83 change-id: 20260506-sockopt_lock-c46837d6d9d7 Best regards, -- Gang Yan <yangang@kylinos.cn>
From: Gang Yan <yangang@kylinos.cn> TCP and the core socket layer all use sockopt_lock_sock() sockopt_release_sock() in their setsockopt and getsockopt handlers. It is a BPF-aware wrapper that skips lock acquisition when invoked from a BPF program, where the socket lock is already held. Using lock_sock_fast() on subflows requires extra care: the fast path holds the socket spinlock with BH disabled, creating an atomic context where sleeping is not allowed. Switching to sockopt_lock_sock() avoids the risk of accidentally introducing sleeping operations inside the lock_sock_fast() critical section. Fixes: 24426654ed3a ("bpf: net: Avoid sk_setsockopt() taking sk lock when called from bpf") Signed-off-by: Gang Yan <yangang@kylinos.cn> --- net/mptcp/sockopt.c | 121 ++++++++++++++++++++++++++-------------------------- 1 file changed, 60 insertions(+), 61 deletions(-) diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -XXX,XX +XXX,XX @@ static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, in struct mptcp_subflow_context *subflow; struct sock *sk = (struct sock *)msk; - lock_sock(sk); + sockopt_lock_sock(sk); sockopt_seq_inc(msk); mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - bool slow = lock_sock_fast(ssk); + sockopt_lock_sock(ssk); switch (optname) { case SO_DEBUG: @@ -XXX,XX +XXX,XX @@ static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, in } subflow->setsockopt_seq = msk->setsockopt_seq; - unlock_sock_fast(ssk, slow); + sockopt_release_sock(ssk); } - release_sock(sk); + sockopt_release_sock(sk); } static int mptcp_sol_socket_intval(struct mptcp_sock *msk, int optname, int val) @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_sol_socket_tstamp(struct mptcp_sock *msk, int optnam if (ret) return ret; - lock_sock(sk); + sockopt_lock_sock(sk); mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - lock_sock(ssk); + sockopt_lock_sock(ssk); sock_set_timestamp(ssk, optname, !!val); - release_sock(ssk); + sockopt_release_sock(ssk); } - release_sock(sk); + sockopt_release_sock(sk); return 0; } @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_sol_socket_timestamping(struct mptcp_sock *msk, if (ret) return ret; - lock_sock(sk); + sockopt_lock_sock(sk); mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - lock_sock(ssk); + sockopt_lock_sock(ssk); sock_set_timestamping(ssk, optname, timestamping); - release_sock(ssk); + sockopt_release_sock(ssk); } - release_sock(sk); + sockopt_release_sock(sk); return 0; } @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_sol_socket_linger(struct mptcp_sock *msk, sockptr_t if (ret) return ret; - lock_sock(sk); + sockopt_lock_sock(sk); sockopt_seq_inc(msk); mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - bool slow = lock_sock_fast(ssk); + sockopt_lock_sock(ssk); if (!ling.l_onoff) { sock_reset_flag(ssk, SOCK_LINGER); @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_sol_socket_linger(struct mptcp_sock *msk, sockptr_t } subflow->setsockopt_seq = msk->setsockopt_seq; - unlock_sock_fast(ssk, slow); + sockopt_release_sock(ssk); } - release_sock(sk); + sockopt_release_sock(sk); return 0; } @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname, case SO_REUSEADDR: case SO_BINDTODEVICE: case SO_BINDTOIFINDEX: - lock_sock(sk); + sockopt_lock_sock(sk); ssk = __mptcp_nmpc_sk(msk); if (IS_ERR(ssk)) { - release_sock(sk); + sockopt_release_sock(sk); return PTR_ERR(ssk); } @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname, else if (optname == SO_BINDTOIFINDEX) sk->sk_bound_dev_if = ssk->sk_bound_dev_if; } - release_sock(sk); + sockopt_release_sock(sk); return ret; case SO_KEEPALIVE: case SO_PRIORITY: @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname, case IPV6_V6ONLY: case IPV6_TRANSPARENT: case IPV6_FREEBIND: - lock_sock(sk); + sockopt_lock_sock(sk); ssk = __mptcp_nmpc_sk(msk); if (IS_ERR(ssk)) { - release_sock(sk); + sockopt_release_sock(sk); return PTR_ERR(ssk); } ret = tcp_setsockopt(ssk, SOL_IPV6, optname, optval, optlen); if (ret != 0) { - release_sock(sk); + sockopt_release_sock(sk); return ret; } @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname, break; } - release_sock(sk); + sockopt_release_sock(sk); break; } @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t cap_net_admin = ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN); ret = 0; - lock_sock(sk); + sockopt_lock_sock(sk); sockopt_seq_inc(msk); mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); int err; - lock_sock(ssk); + sockopt_lock_sock(ssk); err = tcp_set_congestion_control(ssk, name, true, cap_net_admin); if (err < 0 && ret == 0) ret = err; subflow->setsockopt_seq = msk->setsockopt_seq; - release_sock(ssk); + sockopt_release_sock(ssk); } if (ret == 0) strscpy(msk->ca_name, name, sizeof(msk->ca_name)); - release_sock(sk); + sockopt_release_sock(sk); return ret; } @@ -XXX,XX +XXX,XX @@ static int __mptcp_setsockopt_set_val(struct mptcp_sock *msk, int max, struct sock *ssk = mptcp_subflow_tcp_sock(subflow); int ret; - lock_sock(ssk); + sockopt_lock_sock(ssk); ret = set_val(ssk, val); err = err ? : ret; - release_sock(ssk); + sockopt_release_sock(ssk); } if (!err) { @@ -XXX,XX +XXX,XX @@ static int __mptcp_setsockopt_sol_tcp_cork(struct mptcp_sock *msk, int val) mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - lock_sock(ssk); + sockopt_lock_sock(ssk); __tcp_sock_set_cork(ssk, !!val); - release_sock(ssk); + sockopt_release_sock(ssk); } if (!val) mptcp_check_and_set_pending(sk); @@ -XXX,XX +XXX,XX @@ static int __mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, int val) mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - lock_sock(ssk); + sockopt_lock_sock(ssk); __tcp_sock_set_nodelay(ssk, !!val); - release_sock(ssk); + sockopt_release_sock(ssk); } if (val) mptcp_check_and_set_pending(sk); @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_sol_ip_set(struct mptcp_sock *msk, int optname, if (err != 0) return err; - lock_sock(sk); + sockopt_lock_sock(sk); ssk = __mptcp_nmpc_sk(msk); if (IS_ERR(ssk)) { - release_sock(sk); + sockopt_release_sock(sk); return PTR_ERR(ssk); } @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_sol_ip_set(struct mptcp_sock *msk, int optname, READ_ONCE(inet_sk(sk)->local_port_range)); break; default: - release_sock(sk); + sockopt_release_sock(sk); WARN_ON_ONCE(1); return -EOPNOTSUPP; } sockopt_seq_inc(msk); - release_sock(sk); + sockopt_release_sock(sk); return 0; } @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname, if (err != 0) return err; - lock_sock(sk); + sockopt_lock_sock(sk); sockopt_seq_inc(msk); val = READ_ONCE(inet_sk(sk)->tos); mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - bool slow; - slow = lock_sock_fast(ssk); + sockopt_lock_sock(ssk); __ip_sock_set_tos(ssk, val); - unlock_sock_fast(ssk, slow); + sockopt_release_sock(ssk); } - release_sock(sk); + sockopt_release_sock(sk); return 0; } @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int int ret; /* Limit to first subflow, before the connection establishment */ - lock_sock(sk); + sockopt_lock_sock(sk); ssk = __mptcp_nmpc_sk(msk); if (IS_ERR(ssk)) { ret = PTR_ERR(ssk); @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int ret = tcp_setsockopt(ssk, level, optname, optval, optlen); unlock: - release_sock(sk); + sockopt_release_sock(sk); return ret; } @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, if (ret) return ret; - lock_sock(sk); + sockopt_lock_sock(sk); switch (optname) { case TCP_INQ: if (val < 0 || val > 1) @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, ret = -ENOPROTOOPT; } - release_sock(sk); + sockopt_release_sock(sk); return ret; } @@ -XXX,XX +XXX,XX @@ int mptcp_setsockopt(struct sock *sk, int level, int optname, * is in TCP fallback, when TCP socket options are passed through * to the one remaining subflow. */ - lock_sock(sk); + sockopt_lock_sock(sk); ssk = __mptcp_tcp_fallback(msk); - release_sock(sk); + sockopt_release_sock(sk); if (ssk) return tcp_setsockopt(ssk, level, optname, optval, optlen); @@ -XXX,XX +XXX,XX @@ static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int struct sock *ssk; int ret; - lock_sock(sk); + sockopt_lock_sock(sk); ssk = msk->first; if (ssk) goto get; @@ -XXX,XX +XXX,XX @@ static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int ret = tcp_getsockopt(ssk, level, optname, optval, optlen); out: - release_sock(sk); + sockopt_release_sock(sk); return ret; } @@ -XXX,XX +XXX,XX @@ static int mptcp_getsockopt_tcpinfo(struct mptcp_sock *msk, char __user *optval, infoptr = optval + sfd.size_subflow_data; - lock_sock(sk); + sockopt_lock_sock(sk); mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); @@ -XXX,XX +XXX,XX @@ static int mptcp_getsockopt_tcpinfo(struct mptcp_sock *msk, char __user *optval, tcp_get_info(ssk, &info); if (copy_to_user(infoptr, &info, sfd.size_user)) { - release_sock(sk); + sockopt_release_sock(sk); return -EFAULT; } @@ -XXX,XX +XXX,XX @@ static int mptcp_getsockopt_tcpinfo(struct mptcp_sock *msk, char __user *optval, } } - release_sock(sk); + sockopt_release_sock(sk); sfd.num_subflows = sfcount; @@ -XXX,XX +XXX,XX @@ static int mptcp_getsockopt_subflow_addrs(struct mptcp_sock *msk, char __user *o addrptr = optval + sfd.size_subflow_data; - lock_sock(sk); + sockopt_lock_sock(sk); mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); @@ -XXX,XX +XXX,XX @@ static int mptcp_getsockopt_subflow_addrs(struct mptcp_sock *msk, char __user *o mptcp_get_sub_addrs(ssk, &a); if (copy_to_user(addrptr, &a, sfd.size_user)) { - release_sock(sk); + sockopt_release_sock(sk); return -EFAULT; } @@ -XXX,XX +XXX,XX @@ static int mptcp_getsockopt_subflow_addrs(struct mptcp_sock *msk, char __user *o } } - release_sock(sk); + sockopt_release_sock(sk); sfd.num_subflows = sfcount; @@ -XXX,XX +XXX,XX @@ static int mptcp_getsockopt_full_info(struct mptcp_sock *msk, char __user *optva sizeof(struct mptcp_subflow_info)); tcpinfoptr = u64_to_user_ptr(mfi.tcp_info); - lock_sock(sk); + sockopt_lock_sock(sk); mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); struct mptcp_subflow_info sfinfo; @@ -XXX,XX +XXX,XX @@ static int mptcp_getsockopt_full_info(struct mptcp_sock *msk, char __user *optva tcpinfoptr += mfi.size_tcpinfo_user; sfinfoptr += mfi.size_sfinfo_user; } - release_sock(sk); + sockopt_release_sock(sk); mfi.num_subflows = sfcount; if (mptcp_put_full_info(&mfi, optval, copylen, optlen)) @@ -XXX,XX +XXX,XX @@ static int mptcp_getsockopt_full_info(struct mptcp_sock *msk, char __user *optva return 0; fail_release: - release_sock(sk); + sockopt_release_sock(sk); return -EFAULT; } @@ -XXX,XX +XXX,XX @@ int mptcp_getsockopt(struct sock *sk, int level, int optname, * is in TCP fallback, when socket options are passed through * to the one remaining subflow. */ - lock_sock(sk); + sockopt_lock_sock(sk); ssk = __mptcp_tcp_fallback(msk); - release_sock(sk); + sockopt_release_sock(sk); if (ssk) return tcp_getsockopt(ssk, level, optname, optval, option); -- 2.43.0
From: Gang Yan <yangang@kylinos.cn> When a BPF program calls bpf_setsockopt(), it may run in softirq context where ns_capable() is not appropriate as there is no valid credential context. Use sockopt_ns_capable() instead, which skips the capability check when invoked from a BPF program. Fixes: e42c7beee71d ("bpf: net: Consider has_current_bpf_ctx() when testing capable() in sk_setsockopt()") Signed-off-by: Gang Yan <yangang@kylinos.cn> --- net/mptcp/sockopt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t name[ret] = 0; - cap_net_admin = ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN); + cap_net_admin = sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN); ret = 0; sockopt_lock_sock(sk); -- 2.43.0
This small series makes the MPTCP sockopt codepaths consistent with TCP and the core socket layer by using the BPF-aware sockopt_lock_sock()/sockopt_release_sock() helpers introduced in commit 24426654ed3a ("bpf: net: Avoid sk_setsockopt() taking sk lock when called from bpf"). Patch 1 switches all lock_sock()/release_sock()/lock_sock_fast() calls in MPTCP sockopt handlers to use the BPF-aware wrappers, avoiding the risk of sleeping in atomic context when lock_sock_fast() is used. Patch 2 switches ns_capable() to sockopt_ns_capable() in the congestion control setsockopt path, properly handling the case where BPF programs invoke setsockopt from softirq context. Both patches are fixes that should have been part of the original BPF sockopt series. Changelog: v4: - As sashiko said, when processing BPF setsockopt requests, the msk is already locked, but we need to use lock_sock() to protect ssk. If we use sockopt_lock_sock(ssk), it will return without acquiring the lock. - In 'mptcp_setsockopt_sol_tcp_congestion', the load of 'tcp_set_congestion_control' is changed from 'true' to '!has_current_bpf_ctx()' like tcp does. This determines whether tcp_ca_find() or tcp_ca_find_autoload() is called. I agree we should keep consistent with the TCP implementation. v3: - Remove the special symbols in v2. - Use sockopt_ns_capable to replace ns_capable. v2: Link: https://patchwork.kernel.org/project/mptcp/patch/20260422091927.77770-3-gang.yan@linux.dev/ Signed-off-by: Gang Yan <yangang@kylinos.cn> --- Changes in v4: - EDITME: describe what is new in this series revision. - EDITME: use bulletpoints and terse descriptions. - Link to v3: https://lore.kernel.org/r/20260506-sockopt_lock-v3-0-06bd417c6d63@kylinos.cn --- Gang Yan (2): mptcp: use sockopt_lock(release)_sock in sockopt mptcp: use sockopt_ns_capable() in setsockopt congestion control net/mptcp/sockopt.c | 101 ++++++++++++++++++++++++++-------------------------- 1 file changed, 50 insertions(+), 51 deletions(-) --- base-commit: aa15c271d79edde595fb6f4eedb52fbc16325a83 change-id: 20260506-sockopt_lock-c46837d6d9d7 Best regards, -- Gang Yan <yangang@kylinos.cn>
From: Gang Yan <yangang@kylinos.cn> TCP and the core socket layer all use sockopt_lock_sock() sockopt_release_sock() in their setsockopt and getsockopt handlers. It is a BPF-aware wrapper that skips lock acquisition when invoked from a BPF program, where the socket lock is already held. Using lock_sock_fast() on subflows requires extra care: the fast path holds the socket spinlock with BH disabled, creating an atomic context where sleeping is not allowed. Switching to lock_sock() avoids the risk of accidentally introducing sleeping operations inside the lock_sock_fast() critical section. Fixes: 24426654ed3a ("bpf: net: Avoid sk_setsockopt() taking sk lock when called from bpf") Signed-off-by: Gang Yan <yangang@kylinos.cn> --- net/mptcp/sockopt.c | 97 ++++++++++++++++++++++++++--------------------------- 1 file changed, 48 insertions(+), 49 deletions(-) diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -XXX,XX +XXX,XX @@ static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, in struct mptcp_subflow_context *subflow; struct sock *sk = (struct sock *)msk; - lock_sock(sk); + sockopt_lock_sock(sk); sockopt_seq_inc(msk); mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - bool slow = lock_sock_fast(ssk); + lock_sock(ssk); switch (optname) { case SO_DEBUG: @@ -XXX,XX +XXX,XX @@ static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, in } subflow->setsockopt_seq = msk->setsockopt_seq; - unlock_sock_fast(ssk, slow); + release_sock(ssk); } - release_sock(sk); + sockopt_release_sock(sk); } static int mptcp_sol_socket_intval(struct mptcp_sock *msk, int optname, int val) @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_sol_socket_tstamp(struct mptcp_sock *msk, int optnam if (ret) return ret; - lock_sock(sk); + sockopt_lock_sock(sk); mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_sol_socket_tstamp(struct mptcp_sock *msk, int optnam release_sock(ssk); } - release_sock(sk); + sockopt_release_sock(sk); return 0; } @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_sol_socket_timestamping(struct mptcp_sock *msk, if (ret) return ret; - lock_sock(sk); + sockopt_lock_sock(sk); mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_sol_socket_timestamping(struct mptcp_sock *msk, release_sock(ssk); } - release_sock(sk); + sockopt_release_sock(sk); return 0; } @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_sol_socket_linger(struct mptcp_sock *msk, sockptr_t if (ret) return ret; - lock_sock(sk); + sockopt_lock_sock(sk); sockopt_seq_inc(msk); mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - bool slow = lock_sock_fast(ssk); + lock_sock(ssk); if (!ling.l_onoff) { sock_reset_flag(ssk, SOCK_LINGER); @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_sol_socket_linger(struct mptcp_sock *msk, sockptr_t } subflow->setsockopt_seq = msk->setsockopt_seq; - unlock_sock_fast(ssk, slow); + release_sock(ssk); } - release_sock(sk); + sockopt_release_sock(sk); return 0; } @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname, case SO_REUSEADDR: case SO_BINDTODEVICE: case SO_BINDTOIFINDEX: - lock_sock(sk); + sockopt_lock_sock(sk); ssk = __mptcp_nmpc_sk(msk); if (IS_ERR(ssk)) { - release_sock(sk); + sockopt_release_sock(sk); return PTR_ERR(ssk); } @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname, else if (optname == SO_BINDTOIFINDEX) sk->sk_bound_dev_if = ssk->sk_bound_dev_if; } - release_sock(sk); + sockopt_release_sock(sk); return ret; case SO_KEEPALIVE: case SO_PRIORITY: @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname, case IPV6_V6ONLY: case IPV6_TRANSPARENT: case IPV6_FREEBIND: - lock_sock(sk); + sockopt_lock_sock(sk); ssk = __mptcp_nmpc_sk(msk); if (IS_ERR(ssk)) { - release_sock(sk); + sockopt_release_sock(sk); return PTR_ERR(ssk); } ret = tcp_setsockopt(ssk, SOL_IPV6, optname, optval, optlen); if (ret != 0) { - release_sock(sk); + sockopt_release_sock(sk); return ret; } @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname, break; } - release_sock(sk); + sockopt_release_sock(sk); break; } @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t cap_net_admin = ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN); ret = 0; - lock_sock(sk); + sockopt_lock_sock(sk); sockopt_seq_inc(msk); mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t if (ret == 0) strscpy(msk->ca_name, name, sizeof(msk->ca_name)); - release_sock(sk); + sockopt_release_sock(sk); return ret; } @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_sol_ip_set(struct mptcp_sock *msk, int optname, if (err != 0) return err; - lock_sock(sk); + sockopt_lock_sock(sk); ssk = __mptcp_nmpc_sk(msk); if (IS_ERR(ssk)) { - release_sock(sk); + sockopt_release_sock(sk); return PTR_ERR(ssk); } @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_sol_ip_set(struct mptcp_sock *msk, int optname, READ_ONCE(inet_sk(sk)->local_port_range)); break; default: - release_sock(sk); + sockopt_release_sock(sk); WARN_ON_ONCE(1); return -EOPNOTSUPP; } sockopt_seq_inc(msk); - release_sock(sk); + sockopt_release_sock(sk); return 0; } @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname, if (err != 0) return err; - lock_sock(sk); + sockopt_lock_sock(sk); sockopt_seq_inc(msk); val = READ_ONCE(inet_sk(sk)->tos); mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - bool slow; - slow = lock_sock_fast(ssk); + lock_sock(ssk); __ip_sock_set_tos(ssk, val); - unlock_sock_fast(ssk, slow); + release_sock(ssk); } - release_sock(sk); + sockopt_release_sock(sk); return 0; } @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int int ret; /* Limit to first subflow, before the connection establishment */ - lock_sock(sk); + sockopt_lock_sock(sk); ssk = __mptcp_nmpc_sk(msk); if (IS_ERR(ssk)) { ret = PTR_ERR(ssk); @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int ret = tcp_setsockopt(ssk, level, optname, optval, optlen); unlock: - release_sock(sk); + sockopt_release_sock(sk); return ret; } @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, if (ret) return ret; - lock_sock(sk); + sockopt_lock_sock(sk); switch (optname) { case TCP_INQ: if (val < 0 || val > 1) @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, ret = -ENOPROTOOPT; } - release_sock(sk); + sockopt_release_sock(sk); return ret; } @@ -XXX,XX +XXX,XX @@ int mptcp_setsockopt(struct sock *sk, int level, int optname, * is in TCP fallback, when TCP socket options are passed through * to the one remaining subflow. */ - lock_sock(sk); + sockopt_lock_sock(sk); ssk = __mptcp_tcp_fallback(msk); - release_sock(sk); + sockopt_release_sock(sk); if (ssk) return tcp_setsockopt(ssk, level, optname, optval, optlen); @@ -XXX,XX +XXX,XX @@ static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int struct sock *ssk; int ret; - lock_sock(sk); + sockopt_lock_sock(sk); ssk = msk->first; if (ssk) goto get; @@ -XXX,XX +XXX,XX @@ static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int ret = tcp_getsockopt(ssk, level, optname, optval, optlen); out: - release_sock(sk); + sockopt_release_sock(sk); return ret; } @@ -XXX,XX +XXX,XX @@ static int mptcp_getsockopt_tcpinfo(struct mptcp_sock *msk, char __user *optval, infoptr = optval + sfd.size_subflow_data; - lock_sock(sk); + sockopt_lock_sock(sk); mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); @@ -XXX,XX +XXX,XX @@ static int mptcp_getsockopt_tcpinfo(struct mptcp_sock *msk, char __user *optval, tcp_get_info(ssk, &info); if (copy_to_user(infoptr, &info, sfd.size_user)) { - release_sock(sk); + sockopt_release_sock(sk); return -EFAULT; } @@ -XXX,XX +XXX,XX @@ static int mptcp_getsockopt_tcpinfo(struct mptcp_sock *msk, char __user *optval, } } - release_sock(sk); + sockopt_release_sock(sk); sfd.num_subflows = sfcount; @@ -XXX,XX +XXX,XX @@ static int mptcp_getsockopt_subflow_addrs(struct mptcp_sock *msk, char __user *o addrptr = optval + sfd.size_subflow_data; - lock_sock(sk); + sockopt_lock_sock(sk); mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); @@ -XXX,XX +XXX,XX @@ static int mptcp_getsockopt_subflow_addrs(struct mptcp_sock *msk, char __user *o mptcp_get_sub_addrs(ssk, &a); if (copy_to_user(addrptr, &a, sfd.size_user)) { - release_sock(sk); + sockopt_release_sock(sk); return -EFAULT; } @@ -XXX,XX +XXX,XX @@ static int mptcp_getsockopt_subflow_addrs(struct mptcp_sock *msk, char __user *o } } - release_sock(sk); + sockopt_release_sock(sk); sfd.num_subflows = sfcount; @@ -XXX,XX +XXX,XX @@ static int mptcp_getsockopt_full_info(struct mptcp_sock *msk, char __user *optva sizeof(struct mptcp_subflow_info)); tcpinfoptr = u64_to_user_ptr(mfi.tcp_info); - lock_sock(sk); + sockopt_lock_sock(sk); mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); struct mptcp_subflow_info sfinfo; @@ -XXX,XX +XXX,XX @@ static int mptcp_getsockopt_full_info(struct mptcp_sock *msk, char __user *optva tcpinfoptr += mfi.size_tcpinfo_user; sfinfoptr += mfi.size_sfinfo_user; } - release_sock(sk); + sockopt_release_sock(sk); mfi.num_subflows = sfcount; if (mptcp_put_full_info(&mfi, optval, copylen, optlen)) @@ -XXX,XX +XXX,XX @@ static int mptcp_getsockopt_full_info(struct mptcp_sock *msk, char __user *optva return 0; fail_release: - release_sock(sk); + sockopt_release_sock(sk); return -EFAULT; } @@ -XXX,XX +XXX,XX @@ int mptcp_getsockopt(struct sock *sk, int level, int optname, * is in TCP fallback, when socket options are passed through * to the one remaining subflow. */ - lock_sock(sk); + sockopt_lock_sock(sk); ssk = __mptcp_tcp_fallback(msk); - release_sock(sk); + sockopt_release_sock(sk); if (ssk) return tcp_getsockopt(ssk, level, optname, optval, option); -- 2.43.0
From: Gang Yan <yangang@kylinos.cn> When a BPF program calls bpf_setsockopt(), it may run in softirq context where ns_capable() is not appropriate as there is no valid credential context. Use sockopt_ns_capable() instead, which skips the capability check when invoked from a BPF program. Additionally, the load is changed from 'true' to '!has_current_bpf_ctx()' like tcp does. Fixes: e42c7beee71d ("bpf: net: Consider has_current_bpf_ctx() when testing capable() in sk_setsockopt()") Signed-off-by: Gang Yan <yangang@kylinos.cn> --- net/mptcp/sockopt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t name[ret] = 0; - cap_net_admin = ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN); + cap_net_admin = sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN); ret = 0; sockopt_lock_sock(sk); @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t int err; lock_sock(ssk); - err = tcp_set_congestion_control(ssk, name, true, cap_net_admin); + err = tcp_set_congestion_control(ssk, name, !has_current_bpf_ctx(), cap_net_admin); if (err < 0 && ret == 0) ret = err; subflow->setsockopt_seq = msk->setsockopt_seq; -- 2.43.0