From nobody Sat Apr 20 00:27:26 2024 Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.133.124]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 8DA6A211C for ; Wed, 17 May 2023 11:35:46 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1684323345; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=iXcjHjyjTB3JhQ0nyBFQULh4oPJQo+v4lc++HKXxfFo=; b=f4JiUES6uI+n+3vu7EWQ4LdEghXXmoHClWPEIkDD1ydnIXV1bQCJW676tMbO+UywTanRaH V6rJyRRXXTifaa9YkERQqntpW25i0ESZu/c3rvuc9Qbx70jSKrHbcTiRG86agIN5nxZeiT gJnGkQzODZcWf/wmzIFXua1X+otssOA= Received: from mimecast-mx02.redhat.com (mimecast-mx02.redhat.com [66.187.233.88]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id us-mta-442-HuaoqkNwMKi0YXxjGvOMAg-1; Wed, 17 May 2023 07:35:44 -0400 X-MC-Unique: HuaoqkNwMKi0YXxjGvOMAg-1 Received: from smtp.corp.redhat.com (int-mx05.intmail.prod.int.rdu2.redhat.com [10.11.54.5]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id C7BEB185A78B; Wed, 17 May 2023 11:35:43 +0000 (UTC) Received: from gerbillo.redhat.com (unknown [10.39.193.56]) by smtp.corp.redhat.com (Postfix) with ESMTP id 3336B63F8F; Wed, 17 May 2023 11:35:43 +0000 (UTC) From: Paolo Abeni To: mptcp@lists.linux.dev Cc: Christoph Paasch Subject: [PATCH mptcp-net 1/4] mptcp: add annotations around msk->subflow accesses Date: Wed, 17 May 2023 13:35:33 +0200 Message-Id: In-Reply-To: References: Precedence: bulk X-Mailing-List: mptcp@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Scanned-By: MIMEDefang 3.1 on 10.11.54.5 X-Mimecast-Spam-Score: 0 X-Mimecast-Originator: redhat.com Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8"; x-default="true" The MPTCP can access the first subflow socket in a few spots outside the socket lock scope. That is actually safe, as MPTCP will delete the socket itself only after the msk sock close(). Still the such accesses causes a few KCSAN splats, as reported by Christoph. Silence the harmless warning adding a few annotation around the relevant accesses. Fixes: 71ba088ce0aa ("mptcp: cleanup accept and poll") Reported-by: Christoph Paasch Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/402 Signed-off-by: Paolo Abeni --- net/mptcp/protocol.c | 18 ++++++++++-------- net/mptcp/protocol.h | 6 +++++- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 93eac61e7ba7..b96b1191763a 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -91,7 +91,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk) return err; =20 msk->first =3D ssock->sk; - msk->subflow =3D ssock; + WRITE_ONCE(msk->subflow, ssock); subflow =3D mptcp_subflow_ctx(ssock->sk); list_add(&subflow->node, &msk->conn_list); sock_hold(ssock->sk); @@ -2309,7 +2309,7 @@ static void mptcp_dispose_initial_subflow(struct mptc= p_sock *msk) { if (msk->subflow) { iput(SOCK_INODE(msk->subflow)); - msk->subflow =3D NULL; + WRITE_ONCE(msk->subflow, NULL); } } =20 @@ -3184,7 +3184,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk, msk =3D mptcp_sk(nsk); msk->local_key =3D subflow_req->local_key; msk->token =3D subflow_req->token; - msk->subflow =3D NULL; + WRITE_ONCE(msk->subflow, NULL); msk->in_accept_queue =3D 1; WRITE_ONCE(msk->fully_established, false); if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD) @@ -3233,7 +3233,7 @@ static struct sock *mptcp_accept(struct sock *sk, int= flags, int *err, struct socket *listener; struct sock *newsk; =20 - listener =3D msk->subflow; + listener =3D READ_ONCE(msk->subflow); if (WARN_ON_ONCE(!listener)) { *err =3D -EINVAL; return NULL; @@ -3784,10 +3784,10 @@ static int mptcp_stream_accept(struct socket *sock,= struct socket *newsock, =20 pr_debug("msk=3D%p", msk); =20 - /* buggy applications can call accept on socket states other then LISTEN + /* Buggy applications can call accept on socket states other then LISTEN * but no need to allocate the first subflow just to error out. */ - ssock =3D msk->subflow; + ssock =3D READ_ONCE(msk->subflow); if (!ssock) return -EINVAL; =20 @@ -3863,10 +3863,12 @@ static __poll_t mptcp_poll(struct file *file, struc= t socket *sock, state =3D inet_sk_state_load(sk); pr_debug("msk=3D%p state=3D%d flags=3D%lx", msk, state, msk->flags); if (state =3D=3D TCP_LISTEN) { - if (WARN_ON_ONCE(!msk->subflow || !msk->subflow->sk)) + struct socket *ssock =3D READ_ONCE(msk->subflow); + + if (WARN_ON_ONCE(!ssock || !ssock->sk)) return 0; =20 - return inet_csk_listen_poll(msk->subflow->sk); + return inet_csk_listen_poll(ssock->sk); } =20 if (state !=3D TCP_SYN_SENT && state !=3D TCP_SYN_RECV) { diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 1e8effe395d8..552d7b06aaa9 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -304,7 +304,11 @@ struct mptcp_sock { struct list_head rtx_queue; struct mptcp_data_frag *first_pending; struct list_head join_list; - struct socket *subflow; /* outgoing connect/listener/!mp_capable */ + struct socket *subflow; /* outgoing connect/listener/!mp_capable + * The mptcp ops can safely dereference, using suitable + * ONCE annotation, the subflow outside the socket + * lock as such sock is freed after close(). + */ struct sock *first; struct mptcp_pm_data pm; struct mptcp_sched_ops *sched; --=20 2.40.1 From nobody Sat Apr 20 00:27:26 2024 Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.129.124]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 73B39168C9 for ; Wed, 17 May 2023 11:35:47 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1684323346; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=wLDIy2xUKyzmEKJlhL6j+wFHnDWBJCqj63Dim57xp+Q=; b=SM/9KVQhaLoR+Rez5Sl4OvCmpb2Bbtmiea58fkBaSJXLBK4ydag2cPvWduPQLs3WYjyaNM 7eHehlA/gHFR4p66YKlUZF+4tuIl8ZACIJdbYAjx1hATVqwqPRuJd9G2tshRLyCnAPLWLO sXXNfW+1gKrgQCHJNWlxSzYDBPeA/AU= Received: from mimecast-mx02.redhat.com (mimecast-mx02.redhat.com [66.187.233.88]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id us-mta-81-C5ylEF_rPHK5m_7dnN8zuA-1; Wed, 17 May 2023 07:35:45 -0400 X-MC-Unique: C5ylEF_rPHK5m_7dnN8zuA-1 Received: from smtp.corp.redhat.com (int-mx05.intmail.prod.int.rdu2.redhat.com [10.11.54.5]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id A8A108037A8; Wed, 17 May 2023 11:35:44 +0000 (UTC) Received: from gerbillo.redhat.com (unknown [10.39.193.56]) by smtp.corp.redhat.com (Postfix) with ESMTP id 18D2D63F5B; Wed, 17 May 2023 11:35:43 +0000 (UTC) From: Paolo Abeni To: mptcp@lists.linux.dev Cc: Christoph Paasch Subject: [PATCH mptcp-net 2/4] mptcp: consolidate passive msk socket initialization Date: Wed, 17 May 2023 13:35:34 +0200 Message-Id: In-Reply-To: References: Precedence: bulk X-Mailing-List: mptcp@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Scanned-By: MIMEDefang 3.1 on 10.11.54.5 X-Mimecast-Spam-Score: 0 X-Mimecast-Originator: redhat.com Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8"; x-default="true" When the msk socket is cloned at MPC handshake time, a few fields are initializated in a racy way outside mptcp_sk_clone() and the msk socket lock. The above is due historical reasons: before commit a88d0092b24b ("mptcp: simplify subflow_syn_recv_sock()") as the first subflow socket carrying all the needed date was not available yet at msk creation time We can now refactor the code moving the missing initialization bit under the socket lock, removing the init race and avoiding some code duplication. This will also simplify the next patch, as all msk->first write access are now under the msk socket lock. Fixes: 0397c6d85f9c ("mptcp: keep unaccepted MPC subflow into join list") Signed-off-by: Paolo Abeni --- net/mptcp/protocol.c | 33 +++++++++++++++++++++++++++------ net/mptcp/protocol.h | 8 ++++---- net/mptcp/subflow.c | 28 +--------------------------- 3 files changed, 32 insertions(+), 37 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index b96b1191763a..55db12cf7ccb 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3163,9 +3163,10 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struc= t sock *sk) } #endif =20 -struct sock *mptcp_sk_clone(const struct sock *sk, - const struct mptcp_options_received *mp_opt, - struct request_sock *req) +struct sock *mptcp_sk_clone_init(const struct sock *sk, + const struct mptcp_options_received *mp_opt, + struct sock *ssk, + struct request_sock *req) { struct mptcp_subflow_request_sock *subflow_req =3D mptcp_subflow_rsk(req); struct sock *nsk =3D sk_clone_lock(sk, GFP_ATOMIC); @@ -3198,10 +3199,30 @@ struct sock *mptcp_sk_clone(const struct sock *sk, mptcp_init_sched(msk, mptcp_sk(sk)->sched); =20 sock_reset_flag(nsk, SOCK_RCU_FREE); - /* will be fully established after successful MPC subflow creation */ - inet_sk_state_store(nsk, TCP_SYN_RECV); - security_inet_csk_clone(nsk, req); + + /* this can't race with mptcp_close(), as the msk is + * not yet exposted to user-space + */ + inet_sk_state_store(nsk, TCP_ESTABLISHED); + + /* The msk maintain a referece to each subflow in the connections list */ + WRITE_ONCE(msk->first, ssk); + list_add(&mptcp_subflow_ctx(ssk)->node, &msk->conn_list); + sock_hold(ssk); + + /* new mpc subflow takes ownership of the newly + * created mptcp socket + */ + mptcp_token_accept(subflow_req, msk); + + /* set msk addresses early to ensure mptcp_pm_get_local_id() + * uses the correct data + */ + mptcp_copy_inaddrs(nsk, ssk); + mptcp_propagate_sndbuf(nsk, ssk); + + mptcp_rcv_space_init(msk, ssk); bh_unlock_sock(nsk); =20 /* note: the newly allocated socket refcount is 2 now */ diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 552d7b06aaa9..de94c01746dc 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -618,7 +618,6 @@ int mptcp_allow_join_id0(const struct net *net); unsigned int mptcp_stale_loss_cnt(const struct net *net); int mptcp_get_pm_type(const struct net *net); const char *mptcp_get_scheduler(const struct net *net); -void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk); void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, const struct mptcp_options_received *mp_opt); bool __mptcp_retransmit_pending_data(struct sock *sk); @@ -702,9 +701,10 @@ void __init mptcp_proto_init(void); int __init mptcp_proto_v6_init(void); #endif =20 -struct sock *mptcp_sk_clone(const struct sock *sk, - const struct mptcp_options_received *mp_opt, - struct request_sock *req); +struct sock *mptcp_sk_clone_init(const struct sock *sk, + const struct mptcp_options_received *mp_opt, + struct sock *ssk, + struct request_sock *req); void mptcp_get_options(const struct sk_buff *skb, struct mptcp_options_received *mp_opt); =20 diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 76952cf74fc0..63ac4dc621d4 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -815,38 +815,12 @@ static struct sock *subflow_syn_recv_sock(const struc= t sock *sk, ctx->setsockopt_seq =3D listener->setsockopt_seq; =20 if (ctx->mp_capable) { - ctx->conn =3D mptcp_sk_clone(listener->conn, &mp_opt, req); + ctx->conn =3D mptcp_sk_clone_init(listener->conn, &mp_opt, child, req); if (!ctx->conn) goto fallback; =20 owner =3D mptcp_sk(ctx->conn); - - /* this can't race with mptcp_close(), as the msk is - * not yet exposted to user-space - */ - inet_sk_state_store(ctx->conn, TCP_ESTABLISHED); - - /* record the newly created socket as the first msk - * subflow, but don't link it yet into conn_list - */ - WRITE_ONCE(owner->first, child); - - /* new mpc subflow takes ownership of the newly - * created mptcp socket - */ - owner->setsockopt_seq =3D ctx->setsockopt_seq; mptcp_pm_new_connection(owner, child, 1); - mptcp_token_accept(subflow_req, owner); - - /* set msk addresses early to ensure mptcp_pm_get_local_id() - * uses the correct data - */ - mptcp_copy_inaddrs(ctx->conn, child); - mptcp_propagate_sndbuf(ctx->conn, child); - - mptcp_rcv_space_init(owner, child); - list_add(&ctx->node, &owner->conn_list); - sock_hold(child); =20 /* with OoO packets we can reach here without ingress * mpc option --=20 2.40.1 From nobody Sat Apr 20 00:27:26 2024 Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.129.124]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 08D4B2C739 for ; Wed, 17 May 2023 11:35:49 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1684323349; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=mvbWvWeCGg7OLtCob8tBk2leVJPwRYAyb2yBU4ji2xM=; b=FHvmZD/JK3huEE/SZE00HNkRDPaZpBOpaAA3oV6d9+UwwdhkIjJxiMnJgiDlxvl8BIdzl8 XSpCqixUPIBRz4VVk0RN9fHKWlSZTKAJWgdYmNFtFqm9F5LihlMI6gVr3fqcpFOoOCI87Y 0sZmEOLsdQwodnHnAQw1Bqeaob+753I= Received: from mimecast-mx02.redhat.com (mimecast-mx02.redhat.com [66.187.233.88]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id us-mta-365-CI6z4OgxMCumD1PQhzUxWA-1; Wed, 17 May 2023 07:35:45 -0400 X-MC-Unique: CI6z4OgxMCumD1PQhzUxWA-1 Received: from smtp.corp.redhat.com (int-mx05.intmail.prod.int.rdu2.redhat.com [10.11.54.5]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id 87F96185A790; Wed, 17 May 2023 11:35:45 +0000 (UTC) Received: from gerbillo.redhat.com (unknown [10.39.193.56]) by smtp.corp.redhat.com (Postfix) with ESMTP id ED25563F8E; Wed, 17 May 2023 11:35:44 +0000 (UTC) From: Paolo Abeni To: mptcp@lists.linux.dev Cc: Christoph Paasch Subject: [PATCH mptcp-net 3/4] mptcp: fix data race around msk->first access Date: Wed, 17 May 2023 13:35:35 +0200 Message-Id: In-Reply-To: References: Precedence: bulk X-Mailing-List: mptcp@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Scanned-By: MIMEDefang 3.1 on 10.11.54.5 X-Mimecast-Spam-Score: 0 X-Mimecast-Originator: redhat.com Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8"; x-default="true" The first subflow socket is accessed outside the msk socket lock by mptcp_subflow_fail(), we need to annotate each write access with WRITE_ONCE, but a few spots still lacks it. Fixes: 76a13b315709 ("mptcp: invoke MP_FAIL response when needed") Signed-off-by: Paolo Abeni --- net/mptcp/protocol.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 55db12cf7ccb..379ae468d755 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -90,7 +90,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk) if (err) return err; =20 - msk->first =3D ssock->sk; + WRITE_ONCE(msk->first, ssock->sk); WRITE_ONCE(msk->subflow, ssock); subflow =3D mptcp_subflow_ctx(ssock->sk); list_add(&subflow->node, &msk->conn_list); @@ -2446,7 +2446,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct= sock *ssk, sock_put(ssk); =20 if (ssk =3D=3D msk->first) - msk->first =3D NULL; + WRITE_ONCE(msk->first, NULL); =20 out: if (ssk =3D=3D msk->last_snd) @@ -2762,7 +2762,7 @@ static int __mptcp_init_sock(struct sock *sk) WRITE_ONCE(msk->rmem_released, 0); msk->timer_ival =3D TCP_RTO_MIN; =20 - msk->first =3D NULL; + WRITE_ONCE(msk->first, NULL); inet_csk(sk)->icsk_sync_mss =3D mptcp_sync_mss; WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk))); WRITE_ONCE(msk->allow_infinite_fallback, true); --=20 2.40.1 From nobody Sat Apr 20 00:27:26 2024 Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.133.124]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id E32BB168C9 for ; Wed, 17 May 2023 11:35:50 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1684323349; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=kcsuNVRKbJOO8K51jJnXzwFWg2sNweGKXoA7u8+SAkc=; b=aNeDRm1fm5jZFr/HQZemHtds1uTa1sd/3maj5JulqgCQY54IWIT8OLxWSmEKTHCz9E+out vBGZv7Gu8X/J3yNUahuX57NHNwAIznIV5qH+tgYUW83hxMYMfOYPS01FECTKdmVemMwmIa einy+pxBRGGWUSGD9YnnF49AWP2Nj8E= Received: from mimecast-mx02.redhat.com (mx3-rdu2.redhat.com [66.187.233.73]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id us-mta-368-J_BwL1FjNVOLOhzJamzxKQ-1; Wed, 17 May 2023 07:35:46 -0400 X-MC-Unique: J_BwL1FjNVOLOhzJamzxKQ-1 Received: from smtp.corp.redhat.com (int-mx05.intmail.prod.int.rdu2.redhat.com [10.11.54.5]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id 67E5D3815F65; Wed, 17 May 2023 11:35:46 +0000 (UTC) Received: from gerbillo.redhat.com (unknown [10.39.193.56]) by smtp.corp.redhat.com (Postfix) with ESMTP id CC9E163F7E; Wed, 17 May 2023 11:35:45 +0000 (UTC) From: Paolo Abeni To: mptcp@lists.linux.dev Cc: Christoph Paasch Subject: [PATCH mptcp-net 4/4] mptcp: add annotations around sk->sk_shutdown accesses Date: Wed, 17 May 2023 13:35:36 +0200 Message-Id: <4ea2e7aeea3fdfdaeaca9aa431ca66df205f68fb.1684321532.git.pabeni@redhat.com> In-Reply-To: References: Precedence: bulk X-Mailing-List: mptcp@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Scanned-By: MIMEDefang 3.1 on 10.11.54.5 X-Mimecast-Spam-Score: 0 X-Mimecast-Originator: redhat.com Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8"; x-default="true" Christoph reported the mptcp variant of a recently addressed plain TCP issue. Similar to commit e14cadfd80d7 ("tcp: add annotations around sk->sk_shutdown accesses") add READ/WRITE ONCE annotations to silence KCSAN reports around lockless sk_shutdown access. Fixes: 71ba088ce0aa ("mptcp: cleanup accept and poll") Reported-by: Christoph Paasch Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/401 Signed-off-by: Paolo Abeni --- net/mptcp/protocol.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 379ae468d755..21c19a0e0dfb 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -603,7 +603,7 @@ static bool mptcp_check_data_fin(struct sock *sk) WRITE_ONCE(msk->ack_seq, msk->ack_seq + 1); WRITE_ONCE(msk->rcv_data_fin, 0); =20 - sk->sk_shutdown |=3D RCV_SHUTDOWN; + WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | RCV_SHUTDOWN); smp_mb__before_atomic(); /* SHUTDOWN must be visible first */ =20 switch (sk->sk_state) { @@ -910,7 +910,7 @@ static void mptcp_check_for_eof(struct mptcp_sock *msk) /* hopefully temporary hack: propagate shutdown status * to msk, when all subflows agree on it */ - sk->sk_shutdown |=3D RCV_SHUTDOWN; + WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | RCV_SHUTDOWN); =20 smp_mb__before_atomic(); /* SHUTDOWN must be visible first */ sk->sk_data_ready(sk); @@ -2553,7 +2553,7 @@ static void mptcp_check_fastclose(struct mptcp_sock *= msk) } =20 inet_sk_state_store(sk, TCP_CLOSE); - sk->sk_shutdown =3D SHUTDOWN_MASK; + WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK); smp_mb__before_atomic(); /* SHUTDOWN must be visible first */ set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags); =20 @@ -3006,7 +3006,7 @@ bool __mptcp_close(struct sock *sk, long timeout) bool do_cancel_work =3D false; int subflows_alive =3D 0; =20 - sk->sk_shutdown =3D SHUTDOWN_MASK; + WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK); =20 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) { mptcp_listen_inuse_dec(sk); @@ -3149,7 +3149,7 @@ static int mptcp_disconnect(struct sock *sk, int flag= s) mptcp_pm_data_reset(msk); mptcp_ca_reset(sk); =20 - sk->sk_shutdown =3D 0; + WRITE_ONCE(sk->sk_shutdown, 0); sk_error_report(sk); return 0; } @@ -3856,9 +3856,6 @@ static __poll_t mptcp_check_writeable(struct mptcp_so= ck *msk) { struct sock *sk =3D (struct sock *)msk; =20 - if (unlikely(sk->sk_shutdown & SEND_SHUTDOWN)) - return EPOLLOUT | EPOLLWRNORM; - if (sk_stream_is_writeable(sk)) return EPOLLOUT | EPOLLWRNORM; =20 @@ -3876,6 +3873,7 @@ static __poll_t mptcp_poll(struct file *file, struct = socket *sock, struct sock *sk =3D sock->sk; struct mptcp_sock *msk; __poll_t mask =3D 0; + u8 shutdown; int state; =20 msk =3D mptcp_sk(sk); @@ -3892,17 +3890,22 @@ static __poll_t mptcp_poll(struct file *file, struc= t socket *sock, return inet_csk_listen_poll(ssock->sk); } =20 + shutdown =3D READ_ONCE(sk->sk_shutdown); + if (shutdown =3D=3D SHUTDOWN_MASK || state =3D=3D TCP_CLOSE) + mask |=3D EPOLLHUP; + if (shutdown & RCV_SHUTDOWN) + mask |=3D EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; + if (state !=3D TCP_SYN_SENT && state !=3D TCP_SYN_RECV) { mask |=3D mptcp_check_readable(msk); - mask |=3D mptcp_check_writeable(msk); + if (shutdown & SEND_SHUTDOWN) + mask |=3D EPOLLOUT | EPOLLWRNORM; + else + mask |=3D mptcp_check_writeable(msk); } else if (state =3D=3D TCP_SYN_SENT && inet_sk(sk)->defer_connect) { /* cf tcp_poll() note about TFO */ mask |=3D EPOLLOUT | EPOLLWRNORM; } - if (sk->sk_shutdown =3D=3D SHUTDOWN_MASK || state =3D=3D TCP_CLOSE) - mask |=3D EPOLLHUP; - if (sk->sk_shutdown & RCV_SHUTDOWN) - mask |=3D EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; =20 /* This barrier is coupled with smp_wmb() in __mptcp_error_report() */ smp_rmb(); --=20 2.40.1