From nobody Mon Apr 29 01:54:52 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 40509200BC; Wed, 31 May 2023 19:37:17 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id ADBF2C4339E; Wed, 31 May 2023 19:37:16 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1685561837; bh=4WKY0Pr/dAihpTUtgtlYI/RMZE9/4UvbyGFh0IkjtPw=; h=From:Date:Subject:References:In-Reply-To:To:Cc:From; b=ixEsKQZ8xkeS2MoXm5lndgA1dyFJOfDAEHlcCUS6BvFwpWmZfSBdbRgoJzBx//mdA xTWrNm9OtTnTqjcsm3sMiNtHm7LmR3uwMYjCGDy5uzNTtujRPx6caUFcSMpKeSqMKT qEjpS2Fz4fm3oPaF80H0wkjqLbmWncdr1JuGz6yDTxPRSRWHJ5W4DA9bkJDW+ZdhS1 RPKBbROm7WZW0CQIIiXrNveniReHgCJtw8/14Tde9ZPx1yd9BUh/anv6kDJ2mi4+/L r2TvjflAX84ERlDxdJ6mqNJKZhhoNar3axlhOSRKLIULLDxut2kmWmsjAvLV5FtRes mpQG1Ev4C7ywQ== From: Mat Martineau Date: Wed, 31 May 2023 12:37:03 -0700 Subject: [PATCH net 1/6] mptcp: fix connect timeout handling Precedence: bulk X-Mailing-List: mptcp@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20230531-send-net-20230531-v1-1-47750c420571@kernel.org> References: <20230531-send-net-20230531-v1-0-47750c420571@kernel.org> In-Reply-To: <20230531-send-net-20230531-v1-0-47750c420571@kernel.org> To: Matthieu Baerts , "David S. Miller" , Eric Dumazet , Jakub Kicinski , Paolo Abeni , Geliang Tang Cc: netdev@vger.kernel.org, mptcp@lists.linux.dev, Mat Martineau , Ondrej Mosnacek , stable@vger.kernel.org X-Mailer: b4 0.12.2 From: Paolo Abeni Ondrej reported a functional issue WRT timeout handling on connect with a nice reproducer. The problem is that the current mptcp connect waits for both the MPTCP socket level timeout, and the first subflow socket timeout. The latter is not influenced/touched by the exposed setsockopt(). Overall the above makes the SO_SNDTIMEO a no-op on connect. Since mptcp_connect is invoked via inet_stream_connect and the latter properly handle the MPTCP level timeout, we can address the issue making the nested subflow level connect always unblocking. This also allow simplifying a bit the code, dropping an ugly hack to handle the fastopen and custom proto_ops connect. The issues predates the blamed commit below, but the current resolution requires the infrastructure introduced there. Fixes: 54f1944ed6d2 ("mptcp: factor out mptcp_connect()") Reported-by: Ondrej Mosnacek Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/399 Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau --- net/mptcp/protocol.c | 29 +++++++---------------------- net/mptcp/protocol.h | 1 - 2 files changed, 7 insertions(+), 23 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 08dc53f56bc2..9cafd3b89908 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1702,7 +1702,6 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, st= ruct msghdr *msg, =20 lock_sock(ssk); msg->msg_flags |=3D MSG_DONTWAIT; - msk->connect_flags =3D O_NONBLOCK; msk->fastopening =3D 1; ret =3D tcp_sendmsg_fastopen(ssk, msg, copied_syn, len, NULL); msk->fastopening =3D 0; @@ -3617,9 +3616,9 @@ static int mptcp_connect(struct sock *sk, struct sock= addr *uaddr, int addr_len) * acquired the subflow socket lock, too. */ if (msk->fastopening) - err =3D __inet_stream_connect(ssock, uaddr, addr_len, msk->connect_flags= , 1); + err =3D __inet_stream_connect(ssock, uaddr, addr_len, O_NONBLOCK, 1); else - err =3D inet_stream_connect(ssock, uaddr, addr_len, msk->connect_flags); + err =3D inet_stream_connect(ssock, uaddr, addr_len, O_NONBLOCK); inet_sk(sk)->defer_connect =3D inet_sk(ssock->sk)->defer_connect; =20 /* on successful connect, the msk state will be moved to established by @@ -3632,12 +3631,10 @@ static int mptcp_connect(struct sock *sk, struct so= ckaddr *uaddr, int addr_len) =20 mptcp_copy_inaddrs(sk, ssock->sk); =20 - /* unblocking connect, mptcp-level inet_stream_connect will error out - * without changing the socket state, update it here. + /* silence EINPROGRESS and let the caller inet_stream_connect + * handle the connection in progress */ - if (err =3D=3D -EINPROGRESS) - sk->sk_socket->state =3D ssock->state; - return err; + return 0; } =20 static struct proto mptcp_prot =3D { @@ -3696,18 +3693,6 @@ static int mptcp_bind(struct socket *sock, struct so= ckaddr *uaddr, int addr_len) return err; } =20 -static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uadd= r, - int addr_len, int flags) -{ - int ret; - - lock_sock(sock->sk); - mptcp_sk(sock->sk)->connect_flags =3D flags; - ret =3D __inet_stream_connect(sock, uaddr, addr_len, flags, 0); - release_sock(sock->sk); - return ret; -} - static int mptcp_listen(struct socket *sock, int backlog) { struct mptcp_sock *msk =3D mptcp_sk(sock->sk); @@ -3859,7 +3844,7 @@ static const struct proto_ops mptcp_stream_ops =3D { .owner =3D THIS_MODULE, .release =3D inet_release, .bind =3D mptcp_bind, - .connect =3D mptcp_stream_connect, + .connect =3D inet_stream_connect, .socketpair =3D sock_no_socketpair, .accept =3D mptcp_stream_accept, .getname =3D inet_getname, @@ -3954,7 +3939,7 @@ static const struct proto_ops mptcp_v6_stream_ops =3D= { .owner =3D THIS_MODULE, .release =3D inet6_release, .bind =3D mptcp_bind, - .connect =3D mptcp_stream_connect, + .connect =3D inet_stream_connect, .socketpair =3D sock_no_socketpair, .accept =3D mptcp_stream_accept, .getname =3D inet6_getname, diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 2d7b2c80a164..de4667dafe59 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -297,7 +297,6 @@ struct mptcp_sock { nodelay:1, fastopening:1, in_accept_queue:1; - int connect_flags; struct work_struct work; struct sk_buff *ooo_last_skb; struct rb_root out_of_order_queue; --=20 2.40.1 From nobody Mon Apr 29 01:54:52 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id DE575200D6; Wed, 31 May 2023 19:37:17 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 1A964C433A1; Wed, 31 May 2023 19:37:17 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1685561837; bh=RFv40zM9QJYJOLASIioGQHhhSNdv+fGArl5JAYjK2I0=; h=From:Date:Subject:References:In-Reply-To:To:Cc:From; b=Cv/MUpCEWmpI8YuBDior4T9GWiqMgMUeQasZLXhPt+Ei6NCzNGJNfxAeqXYvc/MS5 UhhVhVdIqHSduKzDPQIum5lUWjwP2+PD5LIlUnaMM880CWePi0oGuhj3fYeTJdjw66 1SH2XR8lHaAzkUve1XN+/X1QfwNCyMj2DN13sMaJgZGxQ706OyvtbT4R6PkuVqdQNh RjXtzKL8UTbp/Iz6i8uF7Ni9n1OiVYucBmFywRh18W5BD5btFWI+a9D7yrXnoxMeh3 zP0v1NZ1NPs/QEtT1+8+uzqXzxfF3pkapy3OFKU9ZvyWTOfMSfMppO7qz6UvnRXiaa QcnDzK2WHGC/w== From: Mat Martineau Date: Wed, 31 May 2023 12:37:04 -0700 Subject: [PATCH net 2/6] mptcp: add annotations around msk->subflow accesses Precedence: bulk X-Mailing-List: mptcp@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20230531-send-net-20230531-v1-2-47750c420571@kernel.org> References: <20230531-send-net-20230531-v1-0-47750c420571@kernel.org> In-Reply-To: <20230531-send-net-20230531-v1-0-47750c420571@kernel.org> To: Matthieu Baerts , "David S. Miller" , Eric Dumazet , Jakub Kicinski , Paolo Abeni , Geliang Tang Cc: netdev@vger.kernel.org, mptcp@lists.linux.dev, Mat Martineau , Christoph Paasch X-Mailer: b4 0.12.2 From: Paolo Abeni The MPTCP can access the first subflow socket in a few spots outside the socket lock scope. That is actually safe, as MPTCP will delete the socket itself only after the msk sock close(). Still the such accesses causes a few KCSAN splats, as reported by Christoph. Silence the harmless warning adding a few annotation around the relevant accesses. Fixes: 71ba088ce0aa ("mptcp: cleanup accept and poll") Reported-by: Christoph Paasch Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/402 Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau --- net/mptcp/protocol.c | 18 ++++++++++-------- net/mptcp/protocol.h | 6 +++++- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 9cafd3b89908..ce9de2c946b0 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -91,7 +91,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk) return err; =20 msk->first =3D ssock->sk; - msk->subflow =3D ssock; + WRITE_ONCE(msk->subflow, ssock); subflow =3D mptcp_subflow_ctx(ssock->sk); list_add(&subflow->node, &msk->conn_list); sock_hold(ssock->sk); @@ -2282,7 +2282,7 @@ static void mptcp_dispose_initial_subflow(struct mptc= p_sock *msk) { if (msk->subflow) { iput(SOCK_INODE(msk->subflow)); - msk->subflow =3D NULL; + WRITE_ONCE(msk->subflow, NULL); } } =20 @@ -3136,7 +3136,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk, msk =3D mptcp_sk(nsk); msk->local_key =3D subflow_req->local_key; msk->token =3D subflow_req->token; - msk->subflow =3D NULL; + WRITE_ONCE(msk->subflow, NULL); msk->in_accept_queue =3D 1; WRITE_ONCE(msk->fully_established, false); if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD) @@ -3184,7 +3184,7 @@ static struct sock *mptcp_accept(struct sock *sk, int= flags, int *err, struct socket *listener; struct sock *newsk; =20 - listener =3D msk->subflow; + listener =3D READ_ONCE(msk->subflow); if (WARN_ON_ONCE(!listener)) { *err =3D -EINVAL; return NULL; @@ -3736,10 +3736,10 @@ static int mptcp_stream_accept(struct socket *sock,= struct socket *newsock, =20 pr_debug("msk=3D%p", msk); =20 - /* buggy applications can call accept on socket states other then LISTEN + /* Buggy applications can call accept on socket states other then LISTEN * but no need to allocate the first subflow just to error out. */ - ssock =3D msk->subflow; + ssock =3D READ_ONCE(msk->subflow); if (!ssock) return -EINVAL; =20 @@ -3813,10 +3813,12 @@ static __poll_t mptcp_poll(struct file *file, struc= t socket *sock, state =3D inet_sk_state_load(sk); pr_debug("msk=3D%p state=3D%d flags=3D%lx", msk, state, msk->flags); if (state =3D=3D TCP_LISTEN) { - if (WARN_ON_ONCE(!msk->subflow || !msk->subflow->sk)) + struct socket *ssock =3D READ_ONCE(msk->subflow); + + if (WARN_ON_ONCE(!ssock || !ssock->sk)) return 0; =20 - return inet_csk_listen_poll(msk->subflow->sk); + return inet_csk_listen_poll(ssock->sk); } =20 if (state !=3D TCP_SYN_SENT && state !=3D TCP_SYN_RECV) { diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index de4667dafe59..7a1a3c35470f 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -305,7 +305,11 @@ struct mptcp_sock { struct list_head rtx_queue; struct mptcp_data_frag *first_pending; struct list_head join_list; - struct socket *subflow; /* outgoing connect/listener/!mp_capable */ + struct socket *subflow; /* outgoing connect/listener/!mp_capable + * The mptcp ops can safely dereference, using suitable + * ONCE annotation, the subflow outside the socket + * lock as such sock is freed after close(). + */ struct sock *first; struct mptcp_pm_data pm; struct { --=20 2.40.1 From nobody Mon Apr 29 01:54:52 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 659AC21081; Wed, 31 May 2023 19:37:17 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 77053C4339B; Wed, 31 May 2023 19:37:17 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1685561837; bh=imyn8zDjERXaGf96kmA9Gc/k7y1YpLM9G8xtFvoKTUc=; h=From:Date:Subject:References:In-Reply-To:To:Cc:From; b=tjfTPsYFfm5QRcQtJe0vXInklXz2FbXn0pxa9r7PyGs6t3J7WJZRBhShxUc9d8BL3 yOsDqZ2j7qzAX6O8EQKJkEZS3yD/CsSTDNga0Aykg08HsuSbV5z9uBcy3XOjhFAPP6 tgtUZVS7oGwIeEyKU1Tkfz9h/FurA5i8biw4mT1I7y9Vh7LKrFYFcJsW7R5jTGHQMP 9tNYvyzi88+Bc8a+vbCmwwOn2Tq7PFweJ25QIQaMoRsYyC5lf+QdpJynBFrwq4WQgS E3sWbvIGCBmk9Jp9gWCWc/9jEUIbdNNfGQP7YcTgTB1Cf0QLre9AnkC3DytiXOzdQ2 edUFR4K4OrEIw== From: Mat Martineau Date: Wed, 31 May 2023 12:37:05 -0700 Subject: [PATCH net 3/6] mptcp: consolidate passive msk socket initialization Precedence: bulk X-Mailing-List: mptcp@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20230531-send-net-20230531-v1-3-47750c420571@kernel.org> References: <20230531-send-net-20230531-v1-0-47750c420571@kernel.org> In-Reply-To: <20230531-send-net-20230531-v1-0-47750c420571@kernel.org> To: Matthieu Baerts , "David S. Miller" , Eric Dumazet , Jakub Kicinski , Paolo Abeni , Geliang Tang Cc: netdev@vger.kernel.org, mptcp@lists.linux.dev, Mat Martineau X-Mailer: b4 0.12.2 From: Paolo Abeni When the msk socket is cloned at MPC handshake time, a few fields are initialized in a racy way outside mptcp_sk_clone() and the msk socket lock. The above is due historical reasons: before commit a88d0092b24b ("mptcp: simplify subflow_syn_recv_sock()") as the first subflow socket carrying all the needed date was not available yet at msk creation time We can now refactor the code moving the missing initialization bit under the socket lock, removing the init race and avoiding some code duplication. This will also simplify the next patch, as all msk->first write access are now under the msk socket lock. Fixes: 0397c6d85f9c ("mptcp: keep unaccepted MPC subflow into join list") Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau --- net/mptcp/protocol.c | 35 ++++++++++++++++++++++++++++------- net/mptcp/protocol.h | 8 ++++---- net/mptcp/subflow.c | 28 +--------------------------- 3 files changed, 33 insertions(+), 38 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index ce9de2c946b0..2ecd0117ab1b 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3038,7 +3038,7 @@ static void mptcp_close(struct sock *sk, long timeout) sock_put(sk); } =20 -void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk) +static void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk) { #if IS_ENABLED(CONFIG_MPTCP_IPV6) const struct ipv6_pinfo *ssk6 =3D inet6_sk(ssk); @@ -3115,9 +3115,10 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struc= t sock *sk) } #endif =20 -struct sock *mptcp_sk_clone(const struct sock *sk, - const struct mptcp_options_received *mp_opt, - struct request_sock *req) +struct sock *mptcp_sk_clone_init(const struct sock *sk, + const struct mptcp_options_received *mp_opt, + struct sock *ssk, + struct request_sock *req) { struct mptcp_subflow_request_sock *subflow_req =3D mptcp_subflow_rsk(req); struct sock *nsk =3D sk_clone_lock(sk, GFP_ATOMIC); @@ -3149,10 +3150,30 @@ struct sock *mptcp_sk_clone(const struct sock *sk, msk->setsockopt_seq =3D mptcp_sk(sk)->setsockopt_seq; =20 sock_reset_flag(nsk, SOCK_RCU_FREE); - /* will be fully established after successful MPC subflow creation */ - inet_sk_state_store(nsk, TCP_SYN_RECV); - security_inet_csk_clone(nsk, req); + + /* this can't race with mptcp_close(), as the msk is + * not yet exposted to user-space + */ + inet_sk_state_store(nsk, TCP_ESTABLISHED); + + /* The msk maintain a ref to each subflow in the connections list */ + WRITE_ONCE(msk->first, ssk); + list_add(&mptcp_subflow_ctx(ssk)->node, &msk->conn_list); + sock_hold(ssk); + + /* new mpc subflow takes ownership of the newly + * created mptcp socket + */ + mptcp_token_accept(subflow_req, msk); + + /* set msk addresses early to ensure mptcp_pm_get_local_id() + * uses the correct data + */ + mptcp_copy_inaddrs(nsk, ssk); + mptcp_propagate_sndbuf(nsk, ssk); + + mptcp_rcv_space_init(msk, ssk); bh_unlock_sock(nsk); =20 /* note: the newly allocated socket refcount is 2 now */ diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 7a1a3c35470f..c5255258bfb3 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -616,7 +616,6 @@ int mptcp_is_checksum_enabled(const struct net *net); int mptcp_allow_join_id0(const struct net *net); unsigned int mptcp_stale_loss_cnt(const struct net *net); int mptcp_get_pm_type(const struct net *net); -void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk); void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, const struct mptcp_options_received *mp_opt); bool __mptcp_retransmit_pending_data(struct sock *sk); @@ -686,9 +685,10 @@ void __init mptcp_proto_init(void); int __init mptcp_proto_v6_init(void); #endif =20 -struct sock *mptcp_sk_clone(const struct sock *sk, - const struct mptcp_options_received *mp_opt, - struct request_sock *req); +struct sock *mptcp_sk_clone_init(const struct sock *sk, + const struct mptcp_options_received *mp_opt, + struct sock *ssk, + struct request_sock *req); void mptcp_get_options(const struct sk_buff *skb, struct mptcp_options_received *mp_opt); =20 diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index ba065b66551a..4688daa6b38b 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -815,38 +815,12 @@ static struct sock *subflow_syn_recv_sock(const struc= t sock *sk, ctx->setsockopt_seq =3D listener->setsockopt_seq; =20 if (ctx->mp_capable) { - ctx->conn =3D mptcp_sk_clone(listener->conn, &mp_opt, req); + ctx->conn =3D mptcp_sk_clone_init(listener->conn, &mp_opt, child, req); if (!ctx->conn) goto fallback; =20 owner =3D mptcp_sk(ctx->conn); - - /* this can't race with mptcp_close(), as the msk is - * not yet exposted to user-space - */ - inet_sk_state_store(ctx->conn, TCP_ESTABLISHED); - - /* record the newly created socket as the first msk - * subflow, but don't link it yet into conn_list - */ - WRITE_ONCE(owner->first, child); - - /* new mpc subflow takes ownership of the newly - * created mptcp socket - */ - owner->setsockopt_seq =3D ctx->setsockopt_seq; mptcp_pm_new_connection(owner, child, 1); - mptcp_token_accept(subflow_req, owner); - - /* set msk addresses early to ensure mptcp_pm_get_local_id() - * uses the correct data - */ - mptcp_copy_inaddrs(ctx->conn, child); - mptcp_propagate_sndbuf(ctx->conn, child); - - mptcp_rcv_space_init(owner, child); - list_add(&ctx->node, &owner->conn_list); - sock_hold(child); =20 /* with OoO packets we can reach here without ingress * mpc option --=20 2.40.1 From nobody Mon Apr 29 01:54:52 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 638A82098E; Wed, 31 May 2023 19:37:18 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id CB5B8C433A7; Wed, 31 May 2023 19:37:17 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1685561838; bh=Yuc+8vL6g2z70pfEizKVvsSEp4rll6sEz7sy25zJhlA=; h=From:Date:Subject:References:In-Reply-To:To:Cc:From; b=tmwo2tmWuVdq5iWjNLsnXALC7Njvyy1lQUtig4TFRbfl/r1UiXW8Q+71NanrFVPtS 7ANOa1QU+T4viT1+HMZQLplqQekgyDVEwRe7LSWINJmovft31HY4q7cE/6dP+FQdjY zbLMXwH96KApXpNBXJCruNYtPd+YT6hfZ8uOT63AnAPT36qqppbpR623AaCfMZoR3U cz8xKUfV/Eig6XxryTYOeqD7+tdn7tmdFVzhxQ1Aa0ToU0U9FfL9HpmwTTpxOhdKwe 9jcBnxeiQKTYWOB1v3Ca2Pa5fCVWPWxXNpJNEIOV0+XVG9JadfnFjQp2Y/Og1Yg71h IBNtkswgYASrA== From: Mat Martineau Date: Wed, 31 May 2023 12:37:06 -0700 Subject: [PATCH net 4/6] mptcp: fix data race around msk->first access Precedence: bulk X-Mailing-List: mptcp@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20230531-send-net-20230531-v1-4-47750c420571@kernel.org> References: <20230531-send-net-20230531-v1-0-47750c420571@kernel.org> In-Reply-To: <20230531-send-net-20230531-v1-0-47750c420571@kernel.org> To: Matthieu Baerts , "David S. Miller" , Eric Dumazet , Jakub Kicinski , Paolo Abeni , Geliang Tang Cc: netdev@vger.kernel.org, mptcp@lists.linux.dev, Mat Martineau X-Mailer: b4 0.12.2 From: Paolo Abeni The first subflow socket is accessed outside the msk socket lock by mptcp_subflow_fail(), we need to annotate each write access with WRITE_ONCE, but a few spots still lacks it. Fixes: 76a13b315709 ("mptcp: invoke MP_FAIL response when needed") Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau --- net/mptcp/protocol.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 2ecd0117ab1b..a7dd7d8c9af2 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -90,7 +90,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk) if (err) return err; =20 - msk->first =3D ssock->sk; + WRITE_ONCE(msk->first, ssock->sk); WRITE_ONCE(msk->subflow, ssock); subflow =3D mptcp_subflow_ctx(ssock->sk); list_add(&subflow->node, &msk->conn_list); @@ -2419,7 +2419,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct= sock *ssk, sock_put(ssk); =20 if (ssk =3D=3D msk->first) - msk->first =3D NULL; + WRITE_ONCE(msk->first, NULL); =20 out: if (ssk =3D=3D msk->last_snd) @@ -2720,7 +2720,7 @@ static int __mptcp_init_sock(struct sock *sk) WRITE_ONCE(msk->rmem_released, 0); msk->timer_ival =3D TCP_RTO_MIN; =20 - msk->first =3D NULL; + WRITE_ONCE(msk->first, NULL); inet_csk(sk)->icsk_sync_mss =3D mptcp_sync_mss; WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk))); WRITE_ONCE(msk->allow_infinite_fallback, true); --=20 2.40.1 From nobody Mon Apr 29 01:54:52 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 9BAFB24EAB; Wed, 31 May 2023 19:37:18 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 2D761C433A0; Wed, 31 May 2023 19:37:18 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1685561838; bh=iceGuZ6feTsVTw2tdu2sArnr6g+xi9crr8yJSJGCfJs=; h=From:Date:Subject:References:In-Reply-To:To:Cc:From; b=ag93+kB4q2m9Z1r7ay861w439xgR2KTUUToA+qhghMfokAEkmXg+BukGjBll66mVb QphOohNdQ04MMgM+1Uoy7UCVY2O4JEtYQe+qt+0C7t+9OCYOMVc2exPhZHc3L/Hsjp 8ySUDJt7PUx/3+KJHIO7NNzk+oTKB+latzuLT7kf40LOk6FT1VVni05QNCbcy9gipU IwOEj2Zk6Xey+b/s35msDs62jyRroB6mwGRQBeh55fMGIHZZ0b18t2ugcp0uq/WTvB 5EpA4sOlwUjNcfWd54Zc+lP4pRCZZysKsN8C+663KVv+LUvX2mZjmWv5K4TUe4ZEHO AiEJYD0oCTprg== From: Mat Martineau Date: Wed, 31 May 2023 12:37:07 -0700 Subject: [PATCH net 5/6] mptcp: add annotations around sk->sk_shutdown accesses Precedence: bulk X-Mailing-List: mptcp@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20230531-send-net-20230531-v1-5-47750c420571@kernel.org> References: <20230531-send-net-20230531-v1-0-47750c420571@kernel.org> In-Reply-To: <20230531-send-net-20230531-v1-0-47750c420571@kernel.org> To: Matthieu Baerts , "David S. Miller" , Eric Dumazet , Jakub Kicinski , Paolo Abeni , Geliang Tang Cc: netdev@vger.kernel.org, mptcp@lists.linux.dev, Mat Martineau , Christoph Paasch X-Mailer: b4 0.12.2 From: Paolo Abeni Christoph reported the mptcp variant of a recently addressed plain TCP issue. Similar to commit e14cadfd80d7 ("tcp: add annotations around sk->sk_shutdown accesses") add READ/WRITE ONCE annotations to silence KCSAN reports around lockless sk_shutdown access. Fixes: 71ba088ce0aa ("mptcp: cleanup accept and poll") Reported-by: Christoph Paasch Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/401 Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau --- net/mptcp/protocol.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index a7dd7d8c9af2..af54a878ac27 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -603,7 +603,7 @@ static bool mptcp_check_data_fin(struct sock *sk) WRITE_ONCE(msk->ack_seq, msk->ack_seq + 1); WRITE_ONCE(msk->rcv_data_fin, 0); =20 - sk->sk_shutdown |=3D RCV_SHUTDOWN; + WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | RCV_SHUTDOWN); smp_mb__before_atomic(); /* SHUTDOWN must be visible first */ =20 switch (sk->sk_state) { @@ -910,7 +910,7 @@ static void mptcp_check_for_eof(struct mptcp_sock *msk) /* hopefully temporary hack: propagate shutdown status * to msk, when all subflows agree on it */ - sk->sk_shutdown |=3D RCV_SHUTDOWN; + WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | RCV_SHUTDOWN); =20 smp_mb__before_atomic(); /* SHUTDOWN must be visible first */ sk->sk_data_ready(sk); @@ -2526,7 +2526,7 @@ static void mptcp_check_fastclose(struct mptcp_sock *= msk) } =20 inet_sk_state_store(sk, TCP_CLOSE); - sk->sk_shutdown =3D SHUTDOWN_MASK; + WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK); smp_mb__before_atomic(); /* SHUTDOWN must be visible first */ set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags); =20 @@ -2958,7 +2958,7 @@ bool __mptcp_close(struct sock *sk, long timeout) bool do_cancel_work =3D false; int subflows_alive =3D 0; =20 - sk->sk_shutdown =3D SHUTDOWN_MASK; + WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK); =20 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) { mptcp_listen_inuse_dec(sk); @@ -3101,7 +3101,7 @@ static int mptcp_disconnect(struct sock *sk, int flag= s) mptcp_pm_data_reset(msk); mptcp_ca_reset(sk); =20 - sk->sk_shutdown =3D 0; + WRITE_ONCE(sk->sk_shutdown, 0); sk_error_report(sk); return 0; } @@ -3806,9 +3806,6 @@ static __poll_t mptcp_check_writeable(struct mptcp_so= ck *msk) { struct sock *sk =3D (struct sock *)msk; =20 - if (unlikely(sk->sk_shutdown & SEND_SHUTDOWN)) - return EPOLLOUT | EPOLLWRNORM; - if (sk_stream_is_writeable(sk)) return EPOLLOUT | EPOLLWRNORM; =20 @@ -3826,6 +3823,7 @@ static __poll_t mptcp_poll(struct file *file, struct = socket *sock, struct sock *sk =3D sock->sk; struct mptcp_sock *msk; __poll_t mask =3D 0; + u8 shutdown; int state; =20 msk =3D mptcp_sk(sk); @@ -3842,17 +3840,22 @@ static __poll_t mptcp_poll(struct file *file, struc= t socket *sock, return inet_csk_listen_poll(ssock->sk); } =20 + shutdown =3D READ_ONCE(sk->sk_shutdown); + if (shutdown =3D=3D SHUTDOWN_MASK || state =3D=3D TCP_CLOSE) + mask |=3D EPOLLHUP; + if (shutdown & RCV_SHUTDOWN) + mask |=3D EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; + if (state !=3D TCP_SYN_SENT && state !=3D TCP_SYN_RECV) { mask |=3D mptcp_check_readable(msk); - mask |=3D mptcp_check_writeable(msk); + if (shutdown & SEND_SHUTDOWN) + mask |=3D EPOLLOUT | EPOLLWRNORM; + else + mask |=3D mptcp_check_writeable(msk); } else if (state =3D=3D TCP_SYN_SENT && inet_sk(sk)->defer_connect) { /* cf tcp_poll() note about TFO */ mask |=3D EPOLLOUT | EPOLLWRNORM; } - if (sk->sk_shutdown =3D=3D SHUTDOWN_MASK || state =3D=3D TCP_CLOSE) - mask |=3D EPOLLHUP; - if (sk->sk_shutdown & RCV_SHUTDOWN) - mask |=3D EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; =20 /* This barrier is coupled with smp_wmb() in __mptcp_error_report() */ smp_rmb(); --=20 2.40.1 From nobody Mon Apr 29 01:54:52 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 4C14E34CD3; Wed, 31 May 2023 19:37:18 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 884C1C433D2; Wed, 31 May 2023 19:37:18 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1685561838; bh=eMhN4xxNMlQU6PkbhzwTVXENtyni6r0Q9UeRaOCqmVE=; h=From:Date:Subject:References:In-Reply-To:To:Cc:From; b=G1pV8Ywn8P/kOE109DImsOOVMPEvaggcIcSNIS7vUwBP8qO7cWb8PCLqohhpZBXxW kXVrNo3iZgBjyqj38LukupQxK2rmaUUzZbaeF9hJoec+S0Ud4fs35LhH1b1JRAf39y z2myDDVle3xAqOcZ5ikN2zHQNN5JJK++I4JxkahKHcNsv7OJNcaJQyn5dfP7Cc20j3 8gAXCgPJRZEJESqzEyHFJMG2r/EDdhI4ovGMc2j7Sb2U/ppVBKUDlmAGM9kw9Eqo2f 4i/I3TMCEZNlj6v2uUcKDvWKxm3LA9CkfgC5rhS3qsFj0BpFFb+4tMU7L/lQLGmpIN dLfeD+y+IBMRw== From: Mat Martineau Date: Wed, 31 May 2023 12:37:08 -0700 Subject: [PATCH net 6/6] mptcp: fix active subflow finalization Precedence: bulk X-Mailing-List: mptcp@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20230531-send-net-20230531-v1-6-47750c420571@kernel.org> References: <20230531-send-net-20230531-v1-0-47750c420571@kernel.org> In-Reply-To: <20230531-send-net-20230531-v1-0-47750c420571@kernel.org> To: Matthieu Baerts , "David S. Miller" , Eric Dumazet , Jakub Kicinski , Paolo Abeni , Geliang Tang Cc: netdev@vger.kernel.org, mptcp@lists.linux.dev, Mat Martineau , stable@vger.kernel.org X-Mailer: b4 0.12.2 From: Paolo Abeni Active subflow are inserted into the connection list at creation time. When the MPJ handshake completes successfully, a new subflow creation netlink event is generated correctly, but the current code wrongly avoid initializing a couple of subflow data. The above will cause misbehavior on a few exceptional events: unneeded mptcp-level retransmission on msk-level sequence wrap-around and infinite mapping fallback even when a MPJ socket is present. Address the issue factoring out the needed initialization in a new helper and invoking the latter from __mptcp_finish_join() time for passive subflow and from mptcp_finish_join() for active ones. Fixes: 0530020a7c8f ("mptcp: track and update contiguous data status") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau --- net/mptcp/protocol.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index af54a878ac27..67311e7d5b21 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -825,6 +825,13 @@ void mptcp_data_ready(struct sock *sk, struct sock *ss= k) mptcp_data_unlock(sk); } =20 +static void mptcp_subflow_joined(struct mptcp_sock *msk, struct sock *ssk) +{ + mptcp_subflow_ctx(ssk)->map_seq =3D READ_ONCE(msk->ack_seq); + WRITE_ONCE(msk->allow_infinite_fallback, false); + mptcp_event(MPTCP_EVENT_SUB_ESTABLISHED, msk, ssk, GFP_ATOMIC); +} + static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk) { struct sock *sk =3D (struct sock *)msk; @@ -839,6 +846,7 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk,= struct sock *ssk) mptcp_sock_graft(ssk, sk->sk_socket); =20 mptcp_sockopt_sync_locked(msk, ssk); + mptcp_subflow_joined(msk, ssk); return true; } =20 @@ -3485,14 +3493,16 @@ bool mptcp_finish_join(struct sock *ssk) return false; } =20 - if (!list_empty(&subflow->node)) - goto out; + /* active subflow, already present inside the conn_list */ + if (!list_empty(&subflow->node)) { + mptcp_subflow_joined(msk, ssk); + return true; + } =20 if (!mptcp_pm_allow_new_subflow(msk)) goto err_prohibited; =20 - /* active connections are already on conn_list. - * If we can't acquire msk socket lock here, let the release callback + /* If we can't acquire msk socket lock here, let the release callback * handle it */ mptcp_data_lock(parent); @@ -3515,11 +3525,6 @@ bool mptcp_finish_join(struct sock *ssk) return false; } =20 - subflow->map_seq =3D READ_ONCE(msk->ack_seq); - WRITE_ONCE(msk->allow_infinite_fallback, false); - -out: - mptcp_event(MPTCP_EVENT_SUB_ESTABLISHED, msk, ssk, GFP_ATOMIC); return true; } =20 --=20 2.40.1