:p
atchew
Login
This is the needed refactor for the selinux fixes, as discussed on the ML. The main change, introduced by patch 6/9 consist in moving the first subflow initialization from the msk init callback into the mptcp syscall needing such data (namely: bind, listen, connect). Patch 4 and 5 allow making such change smaller. Patches 7, 8, 9 are not strictly needed, but are some nice to have follow-up, cleaning-up the related code. Specifically patch 8 closes issues/290, but requires some additional pre-req (patches 1-3), which in turn are also nice to have IMHO. Sharing after little testing to get feedback and let the bot massage the new code: patch 1 and 7 can have subtle effect, I would like to have syzkaller digest them for a while. I'll rebase the selinux patches on top and share them soon. Paolo Abeni (9): mptcp: refactor passive socket initialization. mptcp: drop unneeded argument mptcp: drop legacy code. mptcp: avoid unneeded __mptcp_nmpc_socket() usage mptcp: move fastopen subflow check inside mptcp_sendmsg_fastopen() mptcp: move first subflow allocation at mpc access time mptcp: do not keep around the first subflow after disconnect. mptcp: fastclose msk when cleaning unaccepted sockets mptcp: refactor mptcp_stream_accept() net/mptcp/options.c | 9 +-- net/mptcp/pm.c | 4 +- net/mptcp/pm_netlink.c | 4 +- net/mptcp/protocol.c | 159 ++++++++++++++++++++++------------------- net/mptcp/protocol.h | 4 +- net/mptcp/sockopt.c | 18 ++--- net/mptcp/subflow.c | 40 +++++++---- 7 files changed, 132 insertions(+), 106 deletions(-) -- 2.38.1
After commit 30e51b923e43 ("mptcp: fix unreleased socket in accept queue") unaccepted msk sockets go throu complete shutdown, we don't need anymore to delay inserting the first subflow into the subflow lists. The reference counting deserve some extra care, as __mptcp_close() is unaware of the request socket linkage to the first subflow. Signed-off-by: Paolo Abeni <pabeni@redhat.com> --- Notes: - this schema assumes that the TCP code will never drop a request socket from the receive queue after the 3whs. I tried to verify such assumption as strictily as I could, but more eyes more then welcome! - this will cause pktdrill failure for close_before_accept.pkt, because the msk will become fully established before accept() - imho a good thing - and send out add_addr earlier. The pktdrill test change should be easier. --- net/mptcp/protocol.c | 17 ----------------- net/mptcp/subflow.c | 31 ++++++++++++++++++++++++------- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -XXX,XX +XXX,XX @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk) if (sk->sk_socket && !ssk->sk_socket) mptcp_sock_graft(ssk, sk->sk_socket); - mptcp_propagate_sndbuf((struct sock *)msk, ssk); mptcp_sockopt_sync_locked(msk, ssk); return true; } @@ -XXX,XX +XXX,XX @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, lock_sock(newsk); - /* PM/worker can now acquire the first subflow socket - * lock without racing with listener queue cleanup, - * we can notify it, if needed. - * - * Even if remote has reset the initial subflow by now - * the refcnt is still at least one. - */ - subflow = mptcp_subflow_ctx(msk->first); - list_add(&subflow->node, &msk->conn_list); - sock_hold(msk->first); - if (mptcp_is_fully_established(newsk)) - mptcp_pm_fully_established(msk, msk->first, GFP_KERNEL); - - mptcp_rcv_space_init(msk, msk->first); - mptcp_propagate_sndbuf(newsk, msk->first); - /* set ssk->sk_socket of accept()ed flows to mptcp socket. * This is needed so NOSPACE flag can be set from tcp stack. */ diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -XXX,XX +XXX,XX @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, struct mptcp_options_received mp_opt; bool fallback, fallback_is_fatal; struct sock *new_msk = NULL; + struct mptcp_sock *owner; struct sock *child; pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn); @@ -XXX,XX +XXX,XX @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, ctx->setsockopt_seq = listener->setsockopt_seq; if (ctx->mp_capable) { + owner = mptcp_sk(new_msk); + /* this can't race with mptcp_close(), as the msk is * not yet exposted to user-space */ @@ -XXX,XX +XXX,XX @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, /* record the newly created socket as the first msk * subflow, but don't link it yet into conn_list */ - WRITE_ONCE(mptcp_sk(new_msk)->first, child); + WRITE_ONCE(owner->first, child); /* new mpc subflow takes ownership of the newly * created mptcp socket */ mptcp_sk(new_msk)->setsockopt_seq = ctx->setsockopt_seq; - mptcp_pm_new_connection(mptcp_sk(new_msk), child, 1); - mptcp_token_accept(subflow_req, mptcp_sk(new_msk)); + mptcp_pm_new_connection(owner, child, 1); + mptcp_token_accept(subflow_req, owner); ctx->conn = new_msk; new_msk = NULL; @@ -XXX,XX +XXX,XX @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, * uses the correct data */ mptcp_copy_inaddrs(ctx->conn, child); + mptcp_propagate_sndbuf(ctx->conn, child); + + mptcp_rcv_space_init(owner, child); + list_add(&ctx->node, &owner->conn_list); + sock_hold(child); /* with OoO packets we can reach here without ingress * mpc option */ - if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK) + if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK) { mptcp_subflow_fully_established(ctx, &mp_opt); + mptcp_pm_fully_established(owner, child, GFP_ATOMIC); + ctx->pm_notified = 1; + } } else if (ctx->mp_join) { - struct mptcp_sock *owner; - owner = subflow_req->msk; if (!owner) { subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT); @@ -XXX,XX +XXX,XX @@ void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_s sock_hold(sk); lock_sock_nested(sk, SINGLE_DEPTH_NESTING); next = msk->dl_next; - msk->first = NULL; msk->dl_next = NULL; + /* The upcoming mptcp_close is going to drop all the references + * to the first subflow, ignoring that one of such reference is + * owned by the request socket still in the accept queue and that + * later inet_csk_listen_stop will drop it. + * Acquire an extra reference here to avoid an UaF at that point. + */ + if (msk->first) + sock_hold(msk->first); + do_cancel_work = __mptcp_close(sk, 0); release_sock(sk); if (do_cancel_work) { -- 2.38.1
After the previous commit every mptcp_pm_fully_established() is always invoked with a GFP_ATOMIC argument. We can drop it. Signed-off-by: Paolo Abeni <pabeni@redhat.com> --- net/mptcp/options.c | 2 +- net/mptcp/pm.c | 4 ++-- net/mptcp/protocol.h | 2 +- net/mptcp/subflow.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -XXX,XX +XXX,XX @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, clear_3rdack_retransmission(ssk); mptcp_pm_subflow_established(msk); } else { - mptcp_pm_fully_established(msk, ssk, GFP_ATOMIC); + mptcp_pm_fully_established(msk, ssk); } return true; diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -XXX,XX +XXX,XX @@ static bool mptcp_pm_schedule_work(struct mptcp_sock *msk, return true; } -void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp) +void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk) { struct mptcp_pm_data *pm = &msk->pm; bool announce = false; @@ -XXX,XX +XXX,XX @@ void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, spin_unlock_bh(&pm->lock); if (announce) - mptcp_event(MPTCP_EVENT_ESTABLISHED, msk, ssk, gfp); + mptcp_event(MPTCP_EVENT_ESTABLISHED, msk, ssk, GFP_ATOMIC); } void mptcp_pm_connection_closed(struct mptcp_sock *msk) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -XXX,XX +XXX,XX @@ bool mptcp_pm_addr_families_match(const struct sock *sk, void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk); void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk); void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side); -void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp); +void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk); bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk); void mptcp_pm_connection_closed(struct mptcp_sock *msk); void mptcp_pm_subflow_established(struct mptcp_sock *msk); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -XXX,XX +XXX,XX @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, */ if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK) { mptcp_subflow_fully_established(ctx, &mp_opt); - mptcp_pm_fully_established(owner, child, GFP_ATOMIC); + mptcp_pm_fully_established(owner, child); ctx->pm_notified = 1; } } else if (ctx->mp_join) { -- 2.38.1
After the previous commits the PM worker can't race anymore with the unaccepted subflow close and disposal, as the msk keeps a reference to such subflow. We can remove the now irrelevant and confusing checks explicitly preventing the mentioned race. Signed-off-by: Paolo Abeni <pabeni@redhat.com> --- net/mptcp/options.c | 7 +------ net/mptcp/subflow.c | 7 +++---- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -XXX,XX +XXX,XX @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, mptcp_subflow_fully_established(subflow, mp_opt); check_notify: - /* if the subflow is not already linked into the conn_list, we can't - * notify the PM: this subflow is still on the listener queue - * and the PM possibly acquiring the subflow lock could race with - * the listener close - */ - if (likely(subflow->pm_notified) || list_empty(&subflow->node)) + if (likely(subflow->pm_notified)) return true; subflow->pm_notified = 1; diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -XXX,XX +XXX,XX @@ static void subflow_ulp_release(struct sock *ssk) sk = ctx->conn; if (sk) { - /* if the msk has been orphaned, keep the ctx - * alive, will be freed by __mptcp_close_ssk(), - * when the subflow is still unaccepted + /* if the subflow has been closed by the TCP stack, keep + * the ctx alive, will be freed by __mptcp_close_ssk() */ - release = ctx->disposable || list_empty(&ctx->node); + release = ctx->disposable; sock_put(sk); } -- 2.38.1
In a few spots the mptcp code invokes the __mptcp_nmpc_socket() helper multiple times under the same socket lock scope. Additionally, in such places, the socket status ensure that threre is not an MP capable handshake running. Under the above condition we can replace the later __mptcp_nmpc_socket() helper invocation with direct access to the msk->subflow pointer and better document such access is not supposed to fail with WARN(). Signed-off-by: Paolo Abeni <pabeni@redhat.com> --- net/mptcp/protocol.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -XXX,XX +XXX,XX @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, struct socket *listener; struct sock *newsk; - listener = __mptcp_nmpc_socket(msk); + listener = msk->subflow; if (WARN_ON_ONCE(!listener)) { *err = -EINVAL; return NULL; @@ -XXX,XX +XXX,XX @@ static int mptcp_get_port(struct sock *sk, unsigned short snum) struct mptcp_sock *msk = mptcp_sk(sk); struct socket *ssock; - ssock = __mptcp_nmpc_socket(msk); + ssock = msk->subflow; pr_debug("msk=%p, subflow=%p", msk, ssock); if (WARN_ON_ONCE(!ssock)) return -EINVAL; @@ -XXX,XX +XXX,XX @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, pr_debug("msk=%p", msk); - ssock = __mptcp_nmpc_socket(msk); + /* buggy applications can call accept on socket states other then LISTEN + * but no need to allocate the first subflow just to error out. + */ + ssock = msk->subflow; if (!ssock) return -EINVAL; -- 2.38.1
So that we can avoid a bunch of check in fastpath. Additionally we can specialize such check according to the specific fastopen method - defer_connect vs MSG_FASTOPEN. The latter bits will simplify the next patches. Signed-off-by: Paolo Abeni <pabeni@redhat.com> --- net/mptcp/protocol.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -XXX,XX +XXX,XX @@ static void mptcp_set_nospace(struct sock *sk) static int mptcp_disconnect(struct sock *sk, int flags); -static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msghdr *msg, +static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, size_t len, int *copied_syn) { unsigned int saved_flags = msg->msg_flags; struct mptcp_sock *msk = mptcp_sk(sk); + struct sock *ssk; int ret; + /* on flags based fastopen the mptcp is supposed to create the + * first subflow right now. Otherwise we are in the defer_connect + * path, and the first subflow must be already present. + * Since the defer_connect flag is cleared after the first succsful + * fastopen attempt, no need to check for additional subflow status. + */ + if (msg->msg_flags & MSG_FASTOPEN && !__mptcp_nmpc_socket(msk)) + return -EINVAL; + if (!msk->first) + return -EINVAL; + + ssk = msk->first; + lock_sock(ssk); msg->msg_flags |= MSG_DONTWAIT; msk->connect_flags = O_NONBLOCK; @@ -XXX,XX +XXX,XX @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msgh } else if (ret && ret != -EINPROGRESS) { mptcp_disconnect(sk, 0); } + inet_sk(sk)->defer_connect = 0; return ret; } @@ -XXX,XX +XXX,XX @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct mptcp_sock *msk = mptcp_sk(sk); struct page_frag *pfrag; - struct socket *ssock; size_t copied = 0; int ret = 0; long timeo; @@ -XXX,XX +XXX,XX @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) lock_sock(sk); - ssock = __mptcp_nmpc_socket(msk); - if (unlikely(ssock && (inet_sk(ssock->sk)->defer_connect || - msg->msg_flags & MSG_FASTOPEN))) { + if (unlikely(inet_sk(sk)->defer_connect || msg->msg_flags & MSG_FASTOPEN)) { int copied_syn = 0; - ret = mptcp_sendmsg_fastopen(sk, ssock->sk, msg, len, &copied_syn); + ret = mptcp_sendmsg_fastopen(sk, msg, len, &copied_syn); copied += copied_syn; if (ret == -EINPROGRESS && copied_syn > 0) goto out; -- 2.38.1
In the long run this will simplify the mptcp code and will allow for more consistent behavior. Move the first subflow allocation out of the sock->init ops into the __mptcp_nmpc_socket() helper. Since the first subflow creation can now happen after the first setsockopt() we additionally need to invoke mptcp_sockopt_sync() on it. Signed-off-by: Paolo Abeni <pabeni@redhat.com> --- net/mptcp/pm_netlink.c | 4 +-- net/mptcp/protocol.c | 57 +++++++++++++++++++++++++++--------------- net/mptcp/protocol.h | 2 +- net/mptcp/sockopt.c | 18 ++++++------- 4 files changed, 49 insertions(+), 32 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -XXX,XX +XXX,XX @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk, return -EINVAL; ssock = __mptcp_nmpc_socket(msk); - if (!ssock) - return -EINVAL; + if (IS_ERR(ssock)) + return PTR_ERR(ssock); mptcp_info2sockaddr(&entry->addr, &addr, entry->addr.family); #if IS_ENABLED(CONFIG_MPTCP_IPV6) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -XXX,XX +XXX,XX @@ static void __mptcp_check_send_data_fin(struct sock *sk); DEFINE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions); static struct net_device mptcp_napi_dev; -/* If msk has an initial subflow socket, and the MP_CAPABLE handshake has not - * completed yet or has failed, return the subflow socket. - * Otherwise return NULL. - */ -struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk) -{ - if (!msk->subflow || READ_ONCE(msk->can_ack)) - return NULL; - - return msk->subflow; -} - /* Returns end sequence number of the receiver's advertised window */ static u64 mptcp_wnd_end(const struct mptcp_sock *msk) { @@ -XXX,XX +XXX,XX @@ static int __mptcp_socket_create(struct mptcp_sock *msk) return 0; } +/* Returns the first subflow socket if available and the MPC + * handshake is not started yet. + */ +struct socket *__mptcp_nmpc_socket(struct mptcp_sock *msk) +{ + struct sock *sk = (struct sock *)msk; + int ret; + + if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) + return ERR_PTR(-EINVAL); + + if (!msk->subflow) { + if (msk->first) + return ERR_PTR(-EINVAL); + + ret = __mptcp_socket_create(msk); + if (ret) + return ERR_PTR(ret); + + mptcp_sockopt_sync(msk, msk->first); + } + + return msk->subflow; +} + static void mptcp_drop(struct sock *sk, struct sk_buff *skb) { sk_drops_add(sk, skb); @@ -XXX,XX +XXX,XX @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, { unsigned int saved_flags = msg->msg_flags; struct mptcp_sock *msk = mptcp_sk(sk); + struct socket *ssock; struct sock *ssk; int ret; @@ -XXX,XX +XXX,XX @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, * Since the defer_connect flag is cleared after the first succsful * fastopen attempt, no need to check for additional subflow status. */ - if (msg->msg_flags & MSG_FASTOPEN && !__mptcp_nmpc_socket(msk)) - return -EINVAL; + if (msg->msg_flags & MSG_FASTOPEN) { + ssock = __mptcp_nmpc_socket(msk); + if (IS_ERR(ssock)) + return PTR_ERR(ssock); + } if (!msk->first) return -EINVAL; @@ -XXX,XX +XXX,XX @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) int err = -EINVAL; ssock = __mptcp_nmpc_socket(msk); - if (!ssock) - return -EINVAL; + if (IS_ERR(ssock)) + return PTR_ERR(ssock); mptcp_token_destroy(msk); inet_sk_state_store(sk, TCP_SYN_SENT); @@ -XXX,XX +XXX,XX @@ static int mptcp_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) lock_sock(sock->sk); ssock = __mptcp_nmpc_socket(msk); - if (!ssock) { - err = -EINVAL; + if (IS_ERR(ssock)) { + err = PTR_ERR(ssock); goto unlock; } @@ -XXX,XX +XXX,XX @@ static int mptcp_listen(struct socket *sock, int backlog) lock_sock(sk); ssock = __mptcp_nmpc_socket(msk); - if (!ssock) { - err = -EINVAL; + if (IS_ERR(ssock)) { + err = PTR_ERR(ssock); goto unlock; } diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -XXX,XX +XXX,XX @@ void __mptcp_subflow_send_ack(struct sock *ssk); void mptcp_subflow_reset(struct sock *ssk); void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk); void mptcp_sock_graft(struct sock *sk, struct socket *parent); -struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk); +struct socket *__mptcp_nmpc_socket(struct mptcp_sock *msk); bool __mptcp_close(struct sock *sk, long timeout); void mptcp_cancel_work(struct sock *sk); void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk); diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname, case SO_BINDTOIFINDEX: lock_sock(sk); ssock = __mptcp_nmpc_socket(msk); - if (!ssock) { + if (IS_ERR(ssock)) { release_sock(sk); - return -EINVAL; + return PTR_ERR(ssock); } ret = sock_setsockopt(ssock, SOL_SOCKET, optname, optval, optlen); @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname, case IPV6_FREEBIND: lock_sock(sk); ssock = __mptcp_nmpc_socket(msk); - if (!ssock) { + if (IS_ERR(ssock)) { release_sock(sk); - return -EINVAL; + return PTR_ERR(ssock); } ret = tcp_setsockopt(ssock->sk, SOL_IPV6, optname, optval, optlen); @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int o lock_sock(sk); ssock = __mptcp_nmpc_socket(msk); - if (!ssock) { + if (IS_ERR(ssock)) { release_sock(sk); - return -EINVAL; + return PTR_ERR(ssock); } issk = inet_sk(ssock->sk); @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int /* Limit to first subflow, before the connection establishment */ sock = __mptcp_nmpc_socket(msk); - if (!sock) - return -EINVAL; + if (IS_ERR(sock)) + return PTR_ERR(sock); return tcp_setsockopt(sock->sk, level, optname, optval, optlen); } @@ -XXX,XX +XXX,XX @@ static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int } ssock = __mptcp_nmpc_socket(msk); - if (!ssock) + if (IS_ERR(ssock)) goto out; ret = tcp_getsockopt(ssock->sk, level, optname, optval, optlen); -- 2.38.1
After the previous patch the first subflow is allocated as needed at bind, connect, listen time. We don't need anymore to keep alive the first subflow after a disconnect just to be able to perform such syscall. Overal this change makes the passive and active sockets consistent: even passive sockets will be allowed to complete life cycle after disconnect. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/290 Signed-off-by: Paolo Abeni <pabeni@redhat.com> --- net/mptcp/protocol.c | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -XXX,XX +XXX,XX @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, unsigned int flags) { struct mptcp_sock *msk = mptcp_sk(sk); - bool need_push, dispose_it; + bool need_push; - dispose_it = !msk->subflow || ssk != msk->subflow->sk; - if (dispose_it) - list_del(&subflow->node); + list_del(&subflow->node); lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); @@ -XXX,XX +XXX,XX @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, } need_push = (flags & MPTCP_CF_PUSH) && __mptcp_retransmit_pending_data(sk); - if (!dispose_it) { - tcp_disconnect(ssk, 0); - msk->subflow->state = SS_UNCONNECTED; - mptcp_subflow_ctx_reset(subflow); - release_sock(ssk); - - goto out; - } - sock_orphan(ssk); subflow->disposable = 1; @@ -XXX,XX +XXX,XX @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, sock_put(ssk); - if (ssk == msk->first) + if (ssk == msk->first) { msk->first = NULL; + mptcp_dispose_initial_subflow(msk); + } -out: if (ssk == msk->last_snd) msk->last_snd = NULL; @@ -XXX,XX +XXX,XX @@ static void mptcp_destroy(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); - /* clears msk->subflow, allowing the following to close - * even the initial subflow - */ - mptcp_dispose_initial_subflow(msk); mptcp_destroy_common(msk, 0); sk_sockets_allocated_dec(sk); } -- 2.38.1
When cleaning up unaccepted mptcp socket still laying inside the listener queue at listener close time, such sockets will go through a regular close, waiting for a timeout before shutting down the subflows. There is no need to keep the kernel resources in use for such a possibly long time: short-circuit to fast-close. Signed-off-by: Paolo Abeni <pabeni@redhat.com> --- net/mptcp/protocol.c | 7 +++++-- net/mptcp/subflow.c | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -XXX,XX +XXX,XX @@ bool __mptcp_close(struct sock *sk, long timeout) goto cleanup; } - if (mptcp_check_readable(msk)) { - /* the msk has read data, do the MPTCP equivalent of TCP reset */ + if (mptcp_check_readable(msk) || timeout < 0) { + /* If the msk has read data, or the caller explicitly ask it, + * do the MPTCP equivalent of TCP reset, aka MTCP fastclose + */ inet_sk_state_store(sk, TCP_CLOSE); mptcp_do_fastclose(sk); + timeout = 0; } else if (mptcp_close_state(sk)) { __mptcp_wr_shutdown(sk); } diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -XXX,XX +XXX,XX @@ void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_s if (msk->first) sock_hold(msk->first); - do_cancel_work = __mptcp_close(sk, 0); + do_cancel_work = __mptcp_close(sk, -1); release_sock(sk); if (do_cancel_work) { /* lockdep will report a false positive ABBA deadlock -- 2.38.1
Rewrite the mptcp socket accept op, partially open-codying inet_accept(), instead of indirectly calling it. This way we can avoid acquiring the new socket lock twice and we can avoid a couple of indirect calls. Signed-off-by: Paolo Abeni <pabeni@redhat.com> --- net/mptcp/protocol.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -XXX,XX +XXX,XX @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, { struct mptcp_sock *msk = mptcp_sk(sock->sk); struct socket *ssock; + struct sock *newsk; int err; pr_debug("msk=%p", msk); @@ -XXX,XX +XXX,XX @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, if (!ssock) return -EINVAL; - err = ssock->ops->accept(sock, newsock, flags, kern); - if (err == 0 && !mptcp_is_tcpsk(newsock->sk)) { - struct mptcp_sock *msk = mptcp_sk(newsock->sk); + newsk = mptcp_accept(sock->sk, flags, &err, kern); + if (!newsk) + return err; + + lock_sock(newsk); + + sock_rps_record_flow(newsk); + WARN_ON(!((1 << newsk->sk_state) & + (TCPF_ESTABLISHED | TCPF_SYN_RECV | + TCPF_CLOSE_WAIT | TCPF_CLOSE))); + + sock_graft(newsk, newsock); + + newsock->state = SS_CONNECTED; + if (!mptcp_is_tcpsk(newsock->sk)) { + struct mptcp_sock *msk = mptcp_sk(newsk); struct mptcp_subflow_context *subflow; - struct sock *newsk = newsock->sk; set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags); - lock_sock(newsk); - /* set ssk->sk_socket of accept()ed flows to mptcp socket. * This is needed so NOSPACE flag can be set from tcp stack. */ @@ -XXX,XX +XXX,XX @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, if (!ssk->sk_socket) mptcp_sock_graft(ssk, newsock); } - release_sock(newsk); } + release_sock(newsk); - return err; + return 0; } static __poll_t mptcp_check_writeable(struct mptcp_sock *msk) -- 2.38.1
This is the needed refactor for the selinux fixes, as discussed on the ML. Patch the first 2 patches address old, currently not so relevant bugs which will become more serious once the refactor is applied. Patches 3-7 are pre-reqs for the bulk changes, but also IMHO nice to have even stand-alone. The main change, introduced by patch 8, consists in moving the first subflow initialization from the msk init callback into the mptcp syscall needing such data (namely: bind, listen, connect). Patches 9, 10, 11 are not strictly needed, but are some nice to have follow-up, cleaning-up the related code. Specifically patch 10 closes issues/290 Finally patches 12 && 13 address the LSM issue. They really target the LSM subtree, and are added here just to allow verify the fix in our tree before the LSM submission. Sharing after little testing to get feedback and let the bot massage the new code: a couple of patches can have subtle effect, I would like to have syzkaller digest them for a while. Paolo Abeni (13): mptcp: fix locking for setsockopt corner-case mptcp: fix locking for in-kernel listener creation. mptcp: refactor passive socket initialization. mptcp: drop unneeded argument mptcp: drop legacy code. mptcp: avoid unneeded __mptcp_nmpc_socket() usage mptcp: move fastopen subflow check inside mptcp_sendmsg_fastopen() mptcp: move first subflow allocation at mpc access time mptcp: do not keep around the first subflow after disconnect. mptcp: fastclose msk when cleaning unaccepted sockets mptcp: refactor mptcp_stream_accept() security, lsm: Introduce security_mptcp_add_subflow() selinux: Implement mptcp_add_subflow hook include/linux/lsm_hook_defs.h | 1 + include/linux/lsm_hooks.h | 9 ++ include/linux/security.h | 6 ++ net/mptcp/options.c | 9 +- net/mptcp/pm.c | 4 +- net/mptcp/pm_netlink.c | 14 +-- net/mptcp/protocol.c | 163 ++++++++++++++++++---------------- net/mptcp/protocol.h | 4 +- net/mptcp/sockopt.c | 29 +++--- net/mptcp/subflow.c | 48 +++++++--- security/security.c | 5 ++ security/selinux/hooks.c | 16 ++++ security/selinux/netlabel.c | 8 +- 13 files changed, 198 insertions(+), 118 deletions(-) -- 2.39.0
We need to call the __mptcp_nmpc_socket(), and later subflow socket access under the msk socket lock, or e.g. a racing connect() could change the socket status under the wood, with unexpected results. Fixes: 54635bd04701 ("mptcp: add TCP_FASTOPEN_CONNECT socket option") Signed-off-by: Paolo Abeni <pabeni@redhat.com> --- this and the next patch are new v2. In both case the addressed issue is almost irrelevant until patch 8/12, but will cause lockdep splat after such change --- net/mptcp/sockopt.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname, static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname, sockptr_t optval, unsigned int optlen) { + struct sock *sk = (struct sock *)msk; struct socket *sock; + int ret = -EINVAL; /* Limit to first subflow, before the connection establishment */ + lock_sock(sk); sock = __mptcp_nmpc_socket(msk); if (!sock) - return -EINVAL; + goto unlock; - return tcp_setsockopt(sock->sk, level, optname, optval, optlen); + ret = tcp_setsockopt(sock->sk, level, optname, optval, optlen); + +unlock: + release_sock(sk); + return ret; } static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, -- 2.39.0
For consistency, in mptcp_pm_nl_create_listen_socket(), we need to call the __mptcp_nmpc_socket() under the msk socket lock. Note that as a side effect, mptcp_subflow_create_socket() needs a 'nested' lockdep annotation, as it will acquire the subflow (kernel) socket lock under the in-kernel listener msk socket lock. The current lack of locking is almost harmless, because the relevant socket is not exposed to the user space, but in future we will add more complexity to the mentioned helper, let's play safe. Fixes: 1729cf186d8a ("mptcp: create the listening socket for new port") Signed-off-by: Paolo Abeni <pabeni@redhat.com> --- net/mptcp/pm_netlink.c | 10 ++++++---- net/mptcp/subflow.c | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -XXX,XX +XXX,XX @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk, { int addrlen = sizeof(struct sockaddr_in); struct sockaddr_storage addr; - struct mptcp_sock *msk; struct socket *ssock; + struct sock *newsk; int backlog = 1024; int err; @@ -XXX,XX +XXX,XX @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk, if (err) return err; - msk = mptcp_sk(entry->lsk->sk); - if (!msk) + newsk = entry->lsk->sk; + if (!newsk) return -EINVAL; - ssock = __mptcp_nmpc_socket(msk); + lock_sock(newsk); + ssock = __mptcp_nmpc_socket(mptcp_sk(newsk)); + release_sock(newsk); if (!ssock) return -EINVAL; diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -XXX,XX +XXX,XX @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family, if (err) return err; - lock_sock(sf->sk); + lock_sock_nested(sf->sk, SINGLE_DEPTH_NESTING); /* the newly created socket has to be in the same cgroup as its parent */ mptcp_attach_cgroup(sk, sf->sk); -- 2.39.0
After commit 30e51b923e43 ("mptcp: fix unreleased socket in accept queue") unaccepted msk sockets go throu complete shutdown, we don't need anymore to delay inserting the first subflow into the subflow lists. The reference counting deserve some extra care, as __mptcp_close() is unaware of the request socket linkage to the first subflow. Signed-off-by: Paolo Abeni <pabeni@redhat.com> --- Notes: - this schema assumes that the TCP code will never drop a request socket from the receive queue after the 3whs. I tried to verify such assumption as strictily as I could, but more eyes more then welcome! - this will cause pktdrill failure for close_before_accept.pkt, because the msk will become fully established before accept() - imho a good thing - and send out add_addr earlier. The pktdrill test change should be easier. --- net/mptcp/protocol.c | 17 ----------------- net/mptcp/subflow.c | 31 ++++++++++++++++++++++++------- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -XXX,XX +XXX,XX @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk) if (sk->sk_socket && !ssk->sk_socket) mptcp_sock_graft(ssk, sk->sk_socket); - mptcp_propagate_sndbuf((struct sock *)msk, ssk); mptcp_sockopt_sync_locked(msk, ssk); return true; } @@ -XXX,XX +XXX,XX @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, lock_sock(newsk); - /* PM/worker can now acquire the first subflow socket - * lock without racing with listener queue cleanup, - * we can notify it, if needed. - * - * Even if remote has reset the initial subflow by now - * the refcnt is still at least one. - */ - subflow = mptcp_subflow_ctx(msk->first); - list_add(&subflow->node, &msk->conn_list); - sock_hold(msk->first); - if (mptcp_is_fully_established(newsk)) - mptcp_pm_fully_established(msk, msk->first, GFP_KERNEL); - - mptcp_rcv_space_init(msk, msk->first); - mptcp_propagate_sndbuf(newsk, msk->first); - /* set ssk->sk_socket of accept()ed flows to mptcp socket. * This is needed so NOSPACE flag can be set from tcp stack. */ diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -XXX,XX +XXX,XX @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, struct mptcp_options_received mp_opt; bool fallback, fallback_is_fatal; struct sock *new_msk = NULL; + struct mptcp_sock *owner; struct sock *child; pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn); @@ -XXX,XX +XXX,XX @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, ctx->setsockopt_seq = listener->setsockopt_seq; if (ctx->mp_capable) { + owner = mptcp_sk(new_msk); + /* this can't race with mptcp_close(), as the msk is * not yet exposted to user-space */ @@ -XXX,XX +XXX,XX @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, /* record the newly created socket as the first msk * subflow, but don't link it yet into conn_list */ - WRITE_ONCE(mptcp_sk(new_msk)->first, child); + WRITE_ONCE(owner->first, child); /* new mpc subflow takes ownership of the newly * created mptcp socket */ mptcp_sk(new_msk)->setsockopt_seq = ctx->setsockopt_seq; - mptcp_pm_new_connection(mptcp_sk(new_msk), child, 1); - mptcp_token_accept(subflow_req, mptcp_sk(new_msk)); + mptcp_pm_new_connection(owner, child, 1); + mptcp_token_accept(subflow_req, owner); ctx->conn = new_msk; new_msk = NULL; @@ -XXX,XX +XXX,XX @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, * uses the correct data */ mptcp_copy_inaddrs(ctx->conn, child); + mptcp_propagate_sndbuf(ctx->conn, child); + + mptcp_rcv_space_init(owner, child); + list_add(&ctx->node, &owner->conn_list); + sock_hold(child); /* with OoO packets we can reach here without ingress * mpc option */ - if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK) + if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK) { mptcp_subflow_fully_established(ctx, &mp_opt); + mptcp_pm_fully_established(owner, child, GFP_ATOMIC); + ctx->pm_notified = 1; + } } else if (ctx->mp_join) { - struct mptcp_sock *owner; - owner = subflow_req->msk; if (!owner) { subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT); @@ -XXX,XX +XXX,XX @@ void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_s sock_hold(sk); lock_sock_nested(sk, SINGLE_DEPTH_NESTING); next = msk->dl_next; - msk->first = NULL; msk->dl_next = NULL; + /* The upcoming mptcp_close is going to drop all the references + * to the first subflow, ignoring that one of such reference is + * owned by the request socket still in the accept queue and that + * later inet_csk_listen_stop will drop it. + * Acquire an extra reference here to avoid an UaF at that point. + */ + if (msk->first) + sock_hold(msk->first); + do_cancel_work = __mptcp_close(sk, 0); release_sock(sk); if (do_cancel_work) { -- 2.39.0
After the previous commit every mptcp_pm_fully_established() is always invoked with a GFP_ATOMIC argument. We can drop it. Signed-off-by: Paolo Abeni <pabeni@redhat.com> --- net/mptcp/options.c | 2 +- net/mptcp/pm.c | 4 ++-- net/mptcp/protocol.h | 2 +- net/mptcp/subflow.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -XXX,XX +XXX,XX @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, clear_3rdack_retransmission(ssk); mptcp_pm_subflow_established(msk); } else { - mptcp_pm_fully_established(msk, ssk, GFP_ATOMIC); + mptcp_pm_fully_established(msk, ssk); } return true; diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -XXX,XX +XXX,XX @@ static bool mptcp_pm_schedule_work(struct mptcp_sock *msk, return true; } -void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp) +void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk) { struct mptcp_pm_data *pm = &msk->pm; bool announce = false; @@ -XXX,XX +XXX,XX @@ void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, spin_unlock_bh(&pm->lock); if (announce) - mptcp_event(MPTCP_EVENT_ESTABLISHED, msk, ssk, gfp); + mptcp_event(MPTCP_EVENT_ESTABLISHED, msk, ssk, GFP_ATOMIC); } void mptcp_pm_connection_closed(struct mptcp_sock *msk) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -XXX,XX +XXX,XX @@ bool mptcp_pm_addr_families_match(const struct sock *sk, void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk); void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk); void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side); -void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp); +void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk); bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk); void mptcp_pm_connection_closed(struct mptcp_sock *msk); void mptcp_pm_subflow_established(struct mptcp_sock *msk); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -XXX,XX +XXX,XX @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, */ if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK) { mptcp_subflow_fully_established(ctx, &mp_opt); - mptcp_pm_fully_established(owner, child, GFP_ATOMIC); + mptcp_pm_fully_established(owner, child); ctx->pm_notified = 1; } } else if (ctx->mp_join) { -- 2.39.0
After the previous commits the PM worker can't race anymore with the unaccepted subflow close and disposal, as the msk keeps a reference to such subflow. We can remove the now irrelevant and confusing checks explicitly preventing the mentioned race. Signed-off-by: Paolo Abeni <pabeni@redhat.com> --- net/mptcp/options.c | 7 +------ net/mptcp/subflow.c | 7 +++---- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -XXX,XX +XXX,XX @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, mptcp_subflow_fully_established(subflow, mp_opt); check_notify: - /* if the subflow is not already linked into the conn_list, we can't - * notify the PM: this subflow is still on the listener queue - * and the PM possibly acquiring the subflow lock could race with - * the listener close - */ - if (likely(subflow->pm_notified) || list_empty(&subflow->node)) + if (likely(subflow->pm_notified)) return true; subflow->pm_notified = 1; diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -XXX,XX +XXX,XX @@ static void subflow_ulp_release(struct sock *ssk) sk = ctx->conn; if (sk) { - /* if the msk has been orphaned, keep the ctx - * alive, will be freed by __mptcp_close_ssk(), - * when the subflow is still unaccepted + /* if the subflow has been closed by the TCP stack, keep + * the ctx alive, will be freed by __mptcp_close_ssk() */ - release = ctx->disposable || list_empty(&ctx->node); + release = ctx->disposable; sock_put(sk); } -- 2.39.0
In a few spots the mptcp code invokes the __mptcp_nmpc_socket() helper multiple times under the same socket lock scope. Additionally, in such places, the socket status ensure that threre is not an MP capable handshake running. Under the above condition we can replace the later __mptcp_nmpc_socket() helper invocation with direct access to the msk->subflow pointer and better document such access is not supposed to fail with WARN(). Signed-off-by: Paolo Abeni <pabeni@redhat.com> --- net/mptcp/protocol.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -XXX,XX +XXX,XX @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, struct socket *listener; struct sock *newsk; - listener = __mptcp_nmpc_socket(msk); + listener = msk->subflow; if (WARN_ON_ONCE(!listener)) { *err = -EINVAL; return NULL; @@ -XXX,XX +XXX,XX @@ static int mptcp_get_port(struct sock *sk, unsigned short snum) struct mptcp_sock *msk = mptcp_sk(sk); struct socket *ssock; - ssock = __mptcp_nmpc_socket(msk); + ssock = msk->subflow; pr_debug("msk=%p, subflow=%p", msk, ssock); if (WARN_ON_ONCE(!ssock)) return -EINVAL; @@ -XXX,XX +XXX,XX @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, pr_debug("msk=%p", msk); - ssock = __mptcp_nmpc_socket(msk); + /* buggy applications can call accept on socket states other then LISTEN + * but no need to allocate the first subflow just to error out. + */ + ssock = msk->subflow; if (!ssock) return -EINVAL; -- 2.39.0
So that we can avoid a bunch of check in fastpath. Additionally we can specialize such check according to the specific fastopen method - defer_connect vs MSG_FASTOPEN. The latter bits will simplify the next patches. Signed-off-by: Paolo Abeni <pabeni@redhat.com> --- net/mptcp/protocol.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -XXX,XX +XXX,XX @@ static void mptcp_set_nospace(struct sock *sk) static int mptcp_disconnect(struct sock *sk, int flags); -static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msghdr *msg, +static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, size_t len, int *copied_syn) { unsigned int saved_flags = msg->msg_flags; struct mptcp_sock *msk = mptcp_sk(sk); + struct sock *ssk; int ret; + /* on flags based fastopen the mptcp is supposed to create the + * first subflow right now. Otherwise we are in the defer_connect + * path, and the first subflow must be already present. + * Since the defer_connect flag is cleared after the first succsful + * fastopen attempt, no need to check for additional subflow status. + */ + if (msg->msg_flags & MSG_FASTOPEN && !__mptcp_nmpc_socket(msk)) + return -EINVAL; + if (!msk->first) + return -EINVAL; + + ssk = msk->first; + lock_sock(ssk); msg->msg_flags |= MSG_DONTWAIT; msk->connect_flags = O_NONBLOCK; @@ -XXX,XX +XXX,XX @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msgh } else if (ret && ret != -EINPROGRESS) { mptcp_disconnect(sk, 0); } + inet_sk(sk)->defer_connect = 0; return ret; } @@ -XXX,XX +XXX,XX @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct mptcp_sock *msk = mptcp_sk(sk); struct page_frag *pfrag; - struct socket *ssock; size_t copied = 0; int ret = 0; long timeo; @@ -XXX,XX +XXX,XX @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) lock_sock(sk); - ssock = __mptcp_nmpc_socket(msk); - if (unlikely(ssock && (inet_sk(ssock->sk)->defer_connect || - msg->msg_flags & MSG_FASTOPEN))) { + if (unlikely(inet_sk(sk)->defer_connect || msg->msg_flags & MSG_FASTOPEN)) { int copied_syn = 0; - ret = mptcp_sendmsg_fastopen(sk, ssock->sk, msg, len, &copied_syn); + ret = mptcp_sendmsg_fastopen(sk, msg, len, &copied_syn); copied += copied_syn; if (ret == -EINPROGRESS && copied_syn > 0) goto out; -- 2.39.0
In the long run this will simplify the mptcp code and will allow for more consistent behavior. Move the first subflow allocation out of the sock->init ops into the __mptcp_nmpc_socket() helper. Since the first subflow creation can now happen after the first setsockopt() we additionally need to invoke mptcp_sockopt_sync() on it. Signed-off-by: Paolo Abeni <pabeni@redhat.com> --- v1 -> v2: - really remove subflow creation from init() (!!!) --- net/mptcp/pm_netlink.c | 4 +-- net/mptcp/protocol.c | 61 +++++++++++++++++++++++++----------------- net/mptcp/protocol.h | 2 +- net/mptcp/sockopt.c | 20 +++++++------- 4 files changed, 51 insertions(+), 36 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -XXX,XX +XXX,XX @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk, lock_sock(newsk); ssock = __mptcp_nmpc_socket(mptcp_sk(newsk)); release_sock(newsk); - if (!ssock) - return -EINVAL; + if (IS_ERR(ssock)) + return PTR_ERR(ssock); mptcp_info2sockaddr(&entry->addr, &addr, entry->addr.family); #if IS_ENABLED(CONFIG_MPTCP_IPV6) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -XXX,XX +XXX,XX @@ static void __mptcp_check_send_data_fin(struct sock *sk); DEFINE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions); static struct net_device mptcp_napi_dev; -/* If msk has an initial subflow socket, and the MP_CAPABLE handshake has not - * completed yet or has failed, return the subflow socket. - * Otherwise return NULL. - */ -struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk) -{ - if (!msk->subflow || READ_ONCE(msk->can_ack)) - return NULL; - - return msk->subflow; -} - /* Returns end sequence number of the receiver's advertised window */ static u64 mptcp_wnd_end(const struct mptcp_sock *msk) { @@ -XXX,XX +XXX,XX @@ static int __mptcp_socket_create(struct mptcp_sock *msk) return 0; } +/* Returns the first subflow socket if available and the MPC + * handshake is not started yet. + */ +struct socket *__mptcp_nmpc_socket(struct mptcp_sock *msk) +{ + struct sock *sk = (struct sock *)msk; + int ret; + + if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) + return ERR_PTR(-EINVAL); + + if (!msk->subflow) { + if (msk->first) + return ERR_PTR(-EINVAL); + + ret = __mptcp_socket_create(msk); + if (ret) + return ERR_PTR(ret); + + mptcp_sockopt_sync(msk, msk->first); + } + + return msk->subflow; +} + static void mptcp_drop(struct sock *sk, struct sk_buff *skb) { sk_drops_add(sk, skb); @@ -XXX,XX +XXX,XX @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, { unsigned int saved_flags = msg->msg_flags; struct mptcp_sock *msk = mptcp_sk(sk); + struct socket *ssock; struct sock *ssk; int ret; @@ -XXX,XX +XXX,XX @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, * Since the defer_connect flag is cleared after the first succsful * fastopen attempt, no need to check for additional subflow status. */ - if (msg->msg_flags & MSG_FASTOPEN && !__mptcp_nmpc_socket(msk)) - return -EINVAL; + if (msg->msg_flags & MSG_FASTOPEN) { + ssock = __mptcp_nmpc_socket(msk); + if (IS_ERR(ssock)) + return PTR_ERR(ssock); + } if (!msk->first) return -EINVAL; @@ -XXX,XX +XXX,XX @@ static int mptcp_init_sock(struct sock *sk) if (unlikely(!net->mib.mptcp_statistics) && !mptcp_mib_alloc(net)) return -ENOMEM; - ret = __mptcp_socket_create(mptcp_sk(sk)); - if (ret) - return ret; - ret = mptcp_init_sched(mptcp_sk(sk), mptcp_sched_find(mptcp_get_scheduler(net))); if (ret) @@ -XXX,XX +XXX,XX @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) int err = -EINVAL; ssock = __mptcp_nmpc_socket(msk); - if (!ssock) - return -EINVAL; + if (IS_ERR(ssock)) + return PTR_ERR(ssock); mptcp_token_destroy(msk); inet_sk_state_store(sk, TCP_SYN_SENT); @@ -XXX,XX +XXX,XX @@ static int mptcp_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) lock_sock(sock->sk); ssock = __mptcp_nmpc_socket(msk); - if (!ssock) { - err = -EINVAL; + if (IS_ERR(ssock)) { + err = PTR_ERR(ssock); goto unlock; } @@ -XXX,XX +XXX,XX @@ static int mptcp_listen(struct socket *sock, int backlog) lock_sock(sk); ssock = __mptcp_nmpc_socket(msk); - if (!ssock) { - err = -EINVAL; + if (IS_ERR(ssock)) { + err = PTR_ERR(ssock); goto unlock; } diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -XXX,XX +XXX,XX @@ void __mptcp_subflow_send_ack(struct sock *ssk); void mptcp_subflow_reset(struct sock *ssk); void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk); void mptcp_sock_graft(struct sock *sk, struct socket *parent); -struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk); +struct socket *__mptcp_nmpc_socket(struct mptcp_sock *msk); bool __mptcp_close(struct sock *sk, long timeout); void mptcp_cancel_work(struct sock *sk); void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk); diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname, case SO_BINDTOIFINDEX: lock_sock(sk); ssock = __mptcp_nmpc_socket(msk); - if (!ssock) { + if (IS_ERR(ssock)) { release_sock(sk); - return -EINVAL; + return PTR_ERR(ssock); } ret = sock_setsockopt(ssock, SOL_SOCKET, optname, optval, optlen); @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname, case IPV6_FREEBIND: lock_sock(sk); ssock = __mptcp_nmpc_socket(msk); - if (!ssock) { + if (IS_ERR(ssock)) { release_sock(sk); - return -EINVAL; + return PTR_ERR(ssock); } ret = tcp_setsockopt(ssock->sk, SOL_IPV6, optname, optval, optlen); @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int o lock_sock(sk); ssock = __mptcp_nmpc_socket(msk); - if (!ssock) { + if (IS_ERR(ssock)) { release_sock(sk); - return -EINVAL; + return PTR_ERR(ssock); } issk = inet_sk(ssock->sk); @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int { struct sock *sk = (struct sock *)msk; struct socket *sock; - int ret = -EINVAL; + int ret; /* Limit to first subflow, before the connection establishment */ lock_sock(sk); sock = __mptcp_nmpc_socket(msk); - if (!sock) + if (IS_ERR(sock)) { + ret = PTR_ERR(sock); goto unlock; + } ret = tcp_setsockopt(sock->sk, level, optname, optval, optlen); @@ -XXX,XX +XXX,XX @@ static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int } ssock = __mptcp_nmpc_socket(msk); - if (!ssock) + if (IS_ERR(ssock)) goto out; ret = tcp_getsockopt(ssock->sk, level, optname, optval, optlen); -- 2.39.0
After the previous patch the first subflow is allocated as needed at bind, connect, listen time. We don't need anymore to keep alive the first subflow after a disconnect just to be able to perform such syscall. Overal this change makes the passive and active sockets consistent: even passive sockets will be allowed to complete life cycle after disconnect. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/290 Signed-off-by: Paolo Abeni <pabeni@redhat.com> --- net/mptcp/protocol.c | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -XXX,XX +XXX,XX @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, unsigned int flags) { struct mptcp_sock *msk = mptcp_sk(sk); - bool need_push, dispose_it; + bool need_push; - dispose_it = !msk->subflow || ssk != msk->subflow->sk; - if (dispose_it) - list_del(&subflow->node); + list_del(&subflow->node); lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); @@ -XXX,XX +XXX,XX @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, } need_push = (flags & MPTCP_CF_PUSH) && __mptcp_retransmit_pending_data(sk); - if (!dispose_it) { - tcp_disconnect(ssk, 0); - msk->subflow->state = SS_UNCONNECTED; - mptcp_subflow_ctx_reset(subflow); - release_sock(ssk); - - goto out; - } - sock_orphan(ssk); subflow->disposable = 1; @@ -XXX,XX +XXX,XX @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, sock_put(ssk); - if (ssk == msk->first) + if (ssk == msk->first) { msk->first = NULL; + mptcp_dispose_initial_subflow(msk); + } -out: if (ssk == msk->last_snd) msk->last_snd = NULL; @@ -XXX,XX +XXX,XX @@ static void mptcp_destroy(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); - /* clears msk->subflow, allowing the following to close - * even the initial subflow - */ - mptcp_dispose_initial_subflow(msk); mptcp_destroy_common(msk, 0); sk_sockets_allocated_dec(sk); } -- 2.39.0
When cleaning up unaccepted mptcp socket still laying inside the listener queue at listener close time, such sockets will go through a regular close, waiting for a timeout before shutting down the subflows. There is no need to keep the kernel resources in use for such a possibly long time: short-circuit to fast-close. Signed-off-by: Paolo Abeni <pabeni@redhat.com> --- net/mptcp/protocol.c | 7 +++++-- net/mptcp/subflow.c | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -XXX,XX +XXX,XX @@ bool __mptcp_close(struct sock *sk, long timeout) goto cleanup; } - if (mptcp_check_readable(msk)) { - /* the msk has read data, do the MPTCP equivalent of TCP reset */ + if (mptcp_check_readable(msk) || timeout < 0) { + /* If the msk has read data, or the caller explicitly ask it, + * do the MPTCP equivalent of TCP reset, aka MTCP fastclose + */ inet_sk_state_store(sk, TCP_CLOSE); mptcp_do_fastclose(sk); + timeout = 0; } else if (mptcp_close_state(sk)) { __mptcp_wr_shutdown(sk); } diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -XXX,XX +XXX,XX @@ void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_s if (msk->first) sock_hold(msk->first); - do_cancel_work = __mptcp_close(sk, 0); + do_cancel_work = __mptcp_close(sk, -1); release_sock(sk); if (do_cancel_work) { /* lockdep will report a false positive ABBA deadlock -- 2.39.0
Rewrite the mptcp socket accept op, partially open-codying inet_accept(), instead of indirectly calling it. This way we can avoid acquiring the new socket lock twice and we can avoid a couple of indirect calls. Signed-off-by: Paolo Abeni <pabeni@redhat.com> --- net/mptcp/protocol.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -XXX,XX +XXX,XX @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, { struct mptcp_sock *msk = mptcp_sk(sock->sk); struct socket *ssock; + struct sock *newsk; int err; pr_debug("msk=%p", msk); @@ -XXX,XX +XXX,XX @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, if (!ssock) return -EINVAL; - err = ssock->ops->accept(sock, newsock, flags, kern); - if (err == 0 && !mptcp_is_tcpsk(newsock->sk)) { - struct mptcp_sock *msk = mptcp_sk(newsock->sk); + newsk = mptcp_accept(sock->sk, flags, &err, kern); + if (!newsk) + return err; + + lock_sock(newsk); + + sock_rps_record_flow(newsk); + WARN_ON(!((1 << newsk->sk_state) & + (TCPF_ESTABLISHED | TCPF_SYN_RECV | + TCPF_CLOSE_WAIT | TCPF_CLOSE))); + + sock_graft(newsk, newsock); + + newsock->state = SS_CONNECTED; + if (!mptcp_is_tcpsk(newsock->sk)) { + struct mptcp_sock *msk = mptcp_sk(newsk); struct mptcp_subflow_context *subflow; - struct sock *newsk = newsock->sk; set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags); - lock_sock(newsk); - /* set ssk->sk_socket of accept()ed flows to mptcp socket. * This is needed so NOSPACE flag can be set from tcp stack. */ @@ -XXX,XX +XXX,XX @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, if (!ssk->sk_socket) mptcp_sock_graft(ssk, newsock); } - release_sock(newsk); } + release_sock(newsk); - return err; + return 0; } static __poll_t mptcp_check_writeable(struct mptcp_sock *msk) -- 2.39.0
MPTCP can create subflows in kernel context, and later indirectly expose them to user-space, via the owning mptcp socket. As discussed in the reported link, the above causes unexpected failures for server, MPTCP-enabled applications. Let's introduce a new LSM hook to allow the security module to relabel the subflow according to the owing process. Note that the new hook requires both the mptcp socket and the new subflow. This could allow future extensions, e.g. explicitly validating the mptcp <-> subflow linkage. Link: https://lore.kernel.org/mptcp/CAHC9VhTNh-YwiyTds=P1e3rixEDqbRTFj22bpya=+qJqfcaMfg@mail.gmail.com/ Signed-off-by: Paolo Abeni <pabeni@redhat.com> --- v1 -> v2: - fix build issue with !CONFIG_SECURITY_NETWORK --- include/linux/lsm_hook_defs.h | 1 + include/linux/lsm_hooks.h | 9 +++++++++ include/linux/security.h | 6 ++++++ net/mptcp/subflow.c | 6 ++++++ security/security.c | 5 +++++ 5 files changed, 27 insertions(+) diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index XXXXXXX..XXXXXXX 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -XXX,XX +XXX,XX @@ LSM_HOOK(void, LSM_RET_VOID, sctp_sk_clone, struct sctp_association *asoc, struct sock *sk, struct sock *newsk) LSM_HOOK(int, 0, sctp_assoc_established, struct sctp_association *asoc, struct sk_buff *skb) +LSM_HOOK(int, 0, mptcp_add_subflow, struct sock *sk, struct sock *ssk) #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_INFINIBAND diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index XXXXXXX..XXXXXXX 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -XXX,XX +XXX,XX @@ * @skb pointer to skbuff of association packet. * Return 0 if permission is granted. * + * Security hooks for MPTCP + * + * @mptcp_add_subflow + * Update the labeling for the given MPTCP subflow, to match to + * owning MPTCP socket. + * @sk: the owning MPTCP socket + * @ssk: the new subflow + * Return 0 if successful, otherwise < 0 error code. + * * Security hooks for Infiniband * * @ib_pkey_access: diff --git a/include/linux/security.h b/include/linux/security.h index XXXXXXX..XXXXXXX 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -XXX,XX +XXX,XX @@ void security_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk, struct sock *newsk); int security_sctp_assoc_established(struct sctp_association *asoc, struct sk_buff *skb); +int security_mptcp_add_subflow(struct sock *sk, struct sock *ssk); #else /* CONFIG_SECURITY_NETWORK */ static inline int security_unix_stream_connect(struct sock *sock, @@ -XXX,XX +XXX,XX @@ static inline int security_sctp_assoc_established(struct sctp_association *asoc, { return 0; } + +static inline int security_mptcp_add_subflow(struct sock *sk, struct sock *ssk) +{ + return 0; +} #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_INFINIBAND diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -XXX,XX +XXX,XX @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family, lock_sock_nested(sf->sk, SINGLE_DEPTH_NESTING); + err = security_mptcp_add_subflow(sk, sf->sk); + if (err) + goto release_ssk; + /* the newly created socket has to be in the same cgroup as its parent */ mptcp_attach_cgroup(sk, sf->sk); @@ -XXX,XX +XXX,XX @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family, get_net_track(net, &sf->sk->ns_tracker, GFP_KERNEL); sock_inuse_add(net, 1); err = tcp_set_ulp(sf->sk, "mptcp"); + +release_ssk: release_sock(sf->sk); if (err) { diff --git a/security/security.c b/security/security.c index XXXXXXX..XXXXXXX 100644 --- a/security/security.c +++ b/security/security.c @@ -XXX,XX +XXX,XX @@ int security_sctp_assoc_established(struct sctp_association *asoc, } EXPORT_SYMBOL(security_sctp_assoc_established); +int security_mptcp_add_subflow(struct sock *sk, struct sock *ssk) +{ + return call_int_hook(mptcp_add_subflow, 0, sk, ssk); +} + #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_INFINIBAND -- 2.39.0
Newly added subflows should inherit the LSM label from the associated msk socket regarless current context. This patch implements the above copying sid and class from the msk context, deleting the existing subflow label, if any, and then re-creating a new one. The new helper reuses the selinux_netlbl_sk_security_free() function, and the latter can end-up being called multiple times with the same argument; we additionally need to make it idempotent. Signed-off-by: Paolo Abeni <pabeni@redhat.com> --- v1 -> v2: - cleanup selinux_netlbl_sk_security_free() (Paul) - inherit label from msk (Paul) v1 -> v2: - fix build issue with !CONFIG_NETLABEL --- security/selinux/hooks.c | 16 ++++++++++++++++ security/selinux/netlabel.c | 8 ++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index XXXXXXX..XXXXXXX 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -XXX,XX +XXX,XX @@ static void selinux_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk selinux_netlbl_sctp_sk_clone(sk, newsk); } +static int selinux_mptcp_add_subflow(struct sock *sk, struct sock *ssk) +{ + struct sk_security_struct *ssksec = ssk->sk_security; + struct sk_security_struct *sksec = sk->sk_security; + + ssksec->sclass = sksec->sclass; + ssksec->sid = sksec->sid; + + /* replace the existing subflow label deleting the existing one + * and re-recrating a new label using the current context + */ + selinux_netlbl_sk_security_free(ssksec); + return selinux_netlbl_socket_post_create(ssk, ssk->sk_family); +} + static int selinux_inet_conn_request(const struct sock *sk, struct sk_buff *skb, struct request_sock *req) { @@ -XXX,XX +XXX,XX @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(sctp_sk_clone, selinux_sctp_sk_clone), LSM_HOOK_INIT(sctp_bind_connect, selinux_sctp_bind_connect), LSM_HOOK_INIT(sctp_assoc_established, selinux_sctp_assoc_established), + LSM_HOOK_INIT(mptcp_add_subflow, selinux_mptcp_add_subflow), LSM_HOOK_INIT(inet_conn_request, selinux_inet_conn_request), LSM_HOOK_INIT(inet_csk_clone, selinux_inet_csk_clone), LSM_HOOK_INIT(inet_conn_established, selinux_inet_conn_established), diff --git a/security/selinux/netlabel.c b/security/selinux/netlabel.c index XXXXXXX..XXXXXXX 100644 --- a/security/selinux/netlabel.c +++ b/security/selinux/netlabel.c @@ -XXX,XX +XXX,XX @@ void selinux_netlbl_err(struct sk_buff *skb, u16 family, int error, int gateway) */ void selinux_netlbl_sk_security_free(struct sk_security_struct *sksec) { - if (sksec->nlbl_secattr != NULL) - netlbl_secattr_free(sksec->nlbl_secattr); + if (!sksec->nlbl_secattr) + return; + + netlbl_secattr_free(sksec->nlbl_secattr); + sksec->nlbl_secattr = NULL; + sksec->nlbl_state = NLBL_UNSET; } /** -- 2.39.0