From nobody Wed Oct 30 19:55:48 2024 Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.133.124]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 4113A23C90 for ; Thu, 18 May 2023 16:59:26 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1684429165; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=iXcjHjyjTB3JhQ0nyBFQULh4oPJQo+v4lc++HKXxfFo=; b=X9kQZoZvHFpypUrGz8/HyrJXrShQ/Y9ewRdPvDdq8kGcxrJghL2hCfST6EILoGw5IAY6wM 1sOC92enWCb2OEQ9aEBAcVEkYuk4uHX+CW0qXmnEvEca3U9V910Jr2LUokaAIvcZf6fQvo jlPfGMngZSqzBWbZm5RNS9rMPJUZHJs= Received: from mimecast-mx02.redhat.com (mimecast-mx02.redhat.com [66.187.233.88]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id us-mta-624-gz4PLRi1OmOwB5qWQlgRHw-1; Thu, 18 May 2023 12:59:24 -0400 X-MC-Unique: gz4PLRi1OmOwB5qWQlgRHw-1 Received: from smtp.corp.redhat.com (int-mx09.intmail.prod.int.rdu2.redhat.com [10.11.54.9]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id BAC4B185A78B; Thu, 18 May 2023 16:59:23 +0000 (UTC) Received: from gerbillo.redhat.com (unknown [10.39.192.55]) by smtp.corp.redhat.com (Postfix) with ESMTP id 2DDC4492B01; Thu, 18 May 2023 16:59:23 +0000 (UTC) From: Paolo Abeni To: mptcp@lists.linux.dev Cc: Christoph Paasch Subject: [PATCH v2 mptcp-net 1/5] mptcp: add annotations around msk->subflow accesses Date: Thu, 18 May 2023 18:59:10 +0200 Message-Id: In-Reply-To: References: Precedence: bulk X-Mailing-List: mptcp@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Scanned-By: MIMEDefang 3.1 on 10.11.54.9 X-Mimecast-Spam-Score: 0 X-Mimecast-Originator: redhat.com Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8"; x-default="true" The MPTCP can access the first subflow socket in a few spots outside the socket lock scope. That is actually safe, as MPTCP will delete the socket itself only after the msk sock close(). Still the such accesses causes a few KCSAN splats, as reported by Christoph. Silence the harmless warning adding a few annotation around the relevant accesses. Fixes: 71ba088ce0aa ("mptcp: cleanup accept and poll") Reported-by: Christoph Paasch Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/402 Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau --- net/mptcp/protocol.c | 18 ++++++++++-------- net/mptcp/protocol.h | 6 +++++- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 93eac61e7ba7..b96b1191763a 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -91,7 +91,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk) return err; =20 msk->first =3D ssock->sk; - msk->subflow =3D ssock; + WRITE_ONCE(msk->subflow, ssock); subflow =3D mptcp_subflow_ctx(ssock->sk); list_add(&subflow->node, &msk->conn_list); sock_hold(ssock->sk); @@ -2309,7 +2309,7 @@ static void mptcp_dispose_initial_subflow(struct mptc= p_sock *msk) { if (msk->subflow) { iput(SOCK_INODE(msk->subflow)); - msk->subflow =3D NULL; + WRITE_ONCE(msk->subflow, NULL); } } =20 @@ -3184,7 +3184,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk, msk =3D mptcp_sk(nsk); msk->local_key =3D subflow_req->local_key; msk->token =3D subflow_req->token; - msk->subflow =3D NULL; + WRITE_ONCE(msk->subflow, NULL); msk->in_accept_queue =3D 1; WRITE_ONCE(msk->fully_established, false); if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD) @@ -3233,7 +3233,7 @@ static struct sock *mptcp_accept(struct sock *sk, int= flags, int *err, struct socket *listener; struct sock *newsk; =20 - listener =3D msk->subflow; + listener =3D READ_ONCE(msk->subflow); if (WARN_ON_ONCE(!listener)) { *err =3D -EINVAL; return NULL; @@ -3784,10 +3784,10 @@ static int mptcp_stream_accept(struct socket *sock,= struct socket *newsock, =20 pr_debug("msk=3D%p", msk); =20 - /* buggy applications can call accept on socket states other then LISTEN + /* Buggy applications can call accept on socket states other then LISTEN * but no need to allocate the first subflow just to error out. */ - ssock =3D msk->subflow; + ssock =3D READ_ONCE(msk->subflow); if (!ssock) return -EINVAL; =20 @@ -3863,10 +3863,12 @@ static __poll_t mptcp_poll(struct file *file, struc= t socket *sock, state =3D inet_sk_state_load(sk); pr_debug("msk=3D%p state=3D%d flags=3D%lx", msk, state, msk->flags); if (state =3D=3D TCP_LISTEN) { - if (WARN_ON_ONCE(!msk->subflow || !msk->subflow->sk)) + struct socket *ssock =3D READ_ONCE(msk->subflow); + + if (WARN_ON_ONCE(!ssock || !ssock->sk)) return 0; =20 - return inet_csk_listen_poll(msk->subflow->sk); + return inet_csk_listen_poll(ssock->sk); } =20 if (state !=3D TCP_SYN_SENT && state !=3D TCP_SYN_RECV) { diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 1e8effe395d8..552d7b06aaa9 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -304,7 +304,11 @@ struct mptcp_sock { struct list_head rtx_queue; struct mptcp_data_frag *first_pending; struct list_head join_list; - struct socket *subflow; /* outgoing connect/listener/!mp_capable */ + struct socket *subflow; /* outgoing connect/listener/!mp_capable + * The mptcp ops can safely dereference, using suitable + * ONCE annotation, the subflow outside the socket + * lock as such sock is freed after close(). + */ struct sock *first; struct mptcp_pm_data pm; struct mptcp_sched_ops *sched; --=20 2.40.1 From nobody Wed Oct 30 19:55:48 2024 Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.129.124]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 26A0B24E84 for ; Thu, 18 May 2023 16:59:27 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1684429166; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=KFGJuxqD8VGO+kTq8gpWuYToWRI7nZ42EQL2yG0TTsk=; b=J+qc+ubwu+GsaKRcwUeILxpPG0LaJ0MlZslbkTzLs5R0WJLFBWHiq4tsP5f6QI9bpWeCpo dAhu5uRgQQ8tOkWy362ltWIcYJv47KqIFm9lVkNlKz1tvI1yYVFn/S1/pH5a0PhnV4V6XZ dWpPoWNVAmh3BgXcJ0R3pYy0vK34gEQ= Received: from mimecast-mx02.redhat.com (mimecast-mx02.redhat.com [66.187.233.88]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id us-mta-487-ESnLC7YGOLCvBjqS1bpZ_A-1; Thu, 18 May 2023 12:59:24 -0400 X-MC-Unique: ESnLC7YGOLCvBjqS1bpZ_A-1 Received: from smtp.corp.redhat.com (int-mx09.intmail.prod.int.rdu2.redhat.com [10.11.54.9]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id 954E6101A552; Thu, 18 May 2023 16:59:24 +0000 (UTC) Received: from gerbillo.redhat.com (unknown [10.39.192.55]) by smtp.corp.redhat.com (Postfix) with ESMTP id 0AEFE492B01; Thu, 18 May 2023 16:59:23 +0000 (UTC) From: Paolo Abeni To: mptcp@lists.linux.dev Cc: Christoph Paasch Subject: [PATCH v2 mptcp-net 2/5] mptcp: consolidate passive msk socket initialization Date: Thu, 18 May 2023 18:59:11 +0200 Message-Id: <5c6c015e49545ca18e2751c5316e991ccf35d0b5.1684427027.git.pabeni@redhat.com> In-Reply-To: References: Precedence: bulk X-Mailing-List: mptcp@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Scanned-By: MIMEDefang 3.1 on 10.11.54.9 X-Mimecast-Spam-Score: 0 X-Mimecast-Originator: redhat.com Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8"; x-default="true" When the msk socket is cloned at MPC handshake time, a few fields are initializated in a racy way outside mptcp_sk_clone() and the msk socket lock. The above is due historical reasons: before commit a88d0092b24b ("mptcp: simplify subflow_syn_recv_sock()") as the first subflow socket carrying all the needed date was not available yet at msk creation time We can now refactor the code moving the missing initialization bit under the socket lock, removing the init race and avoiding some code duplication. This will also simplify the next patch, as all msk->first write access are now under the msk socket lock. Fixes: 0397c6d85f9c ("mptcp: keep unaccepted MPC subflow into join list") Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau --- v1 -> v2: - make mptcp_copy_inaddrs() static, fixing a W=3D1 compiler warning --- net/mptcp/protocol.c | 35 ++++++++++++++++++++++++++++------- net/mptcp/protocol.h | 8 ++++---- net/mptcp/subflow.c | 28 +--------------------------- 3 files changed, 33 insertions(+), 38 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index b96b1191763a..38709c332367 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3086,7 +3086,7 @@ static void mptcp_close(struct sock *sk, long timeout) sock_put(sk); } =20 -void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk) +static void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk) { #if IS_ENABLED(CONFIG_MPTCP_IPV6) const struct ipv6_pinfo *ssk6 =3D inet6_sk(ssk); @@ -3163,9 +3163,10 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struc= t sock *sk) } #endif =20 -struct sock *mptcp_sk_clone(const struct sock *sk, - const struct mptcp_options_received *mp_opt, - struct request_sock *req) +struct sock *mptcp_sk_clone_init(const struct sock *sk, + const struct mptcp_options_received *mp_opt, + struct sock *ssk, + struct request_sock *req) { struct mptcp_subflow_request_sock *subflow_req =3D mptcp_subflow_rsk(req); struct sock *nsk =3D sk_clone_lock(sk, GFP_ATOMIC); @@ -3198,10 +3199,30 @@ struct sock *mptcp_sk_clone(const struct sock *sk, mptcp_init_sched(msk, mptcp_sk(sk)->sched); =20 sock_reset_flag(nsk, SOCK_RCU_FREE); - /* will be fully established after successful MPC subflow creation */ - inet_sk_state_store(nsk, TCP_SYN_RECV); - security_inet_csk_clone(nsk, req); + + /* this can't race with mptcp_close(), as the msk is + * not yet exposted to user-space + */ + inet_sk_state_store(nsk, TCP_ESTABLISHED); + + /* The msk maintain a referece to each subflow in the connections list */ + WRITE_ONCE(msk->first, ssk); + list_add(&mptcp_subflow_ctx(ssk)->node, &msk->conn_list); + sock_hold(ssk); + + /* new mpc subflow takes ownership of the newly + * created mptcp socket + */ + mptcp_token_accept(subflow_req, msk); + + /* set msk addresses early to ensure mptcp_pm_get_local_id() + * uses the correct data + */ + mptcp_copy_inaddrs(nsk, ssk); + mptcp_propagate_sndbuf(nsk, ssk); + + mptcp_rcv_space_init(msk, ssk); bh_unlock_sock(nsk); =20 /* note: the newly allocated socket refcount is 2 now */ diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 552d7b06aaa9..de94c01746dc 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -618,7 +618,6 @@ int mptcp_allow_join_id0(const struct net *net); unsigned int mptcp_stale_loss_cnt(const struct net *net); int mptcp_get_pm_type(const struct net *net); const char *mptcp_get_scheduler(const struct net *net); -void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk); void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, const struct mptcp_options_received *mp_opt); bool __mptcp_retransmit_pending_data(struct sock *sk); @@ -702,9 +701,10 @@ void __init mptcp_proto_init(void); int __init mptcp_proto_v6_init(void); #endif =20 -struct sock *mptcp_sk_clone(const struct sock *sk, - const struct mptcp_options_received *mp_opt, - struct request_sock *req); +struct sock *mptcp_sk_clone_init(const struct sock *sk, + const struct mptcp_options_received *mp_opt, + struct sock *ssk, + struct request_sock *req); void mptcp_get_options(const struct sk_buff *skb, struct mptcp_options_received *mp_opt); =20 diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 76952cf74fc0..63ac4dc621d4 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -815,38 +815,12 @@ static struct sock *subflow_syn_recv_sock(const struc= t sock *sk, ctx->setsockopt_seq =3D listener->setsockopt_seq; =20 if (ctx->mp_capable) { - ctx->conn =3D mptcp_sk_clone(listener->conn, &mp_opt, req); + ctx->conn =3D mptcp_sk_clone_init(listener->conn, &mp_opt, child, req); if (!ctx->conn) goto fallback; =20 owner =3D mptcp_sk(ctx->conn); - - /* this can't race with mptcp_close(), as the msk is - * not yet exposted to user-space - */ - inet_sk_state_store(ctx->conn, TCP_ESTABLISHED); - - /* record the newly created socket as the first msk - * subflow, but don't link it yet into conn_list - */ - WRITE_ONCE(owner->first, child); - - /* new mpc subflow takes ownership of the newly - * created mptcp socket - */ - owner->setsockopt_seq =3D ctx->setsockopt_seq; mptcp_pm_new_connection(owner, child, 1); - mptcp_token_accept(subflow_req, owner); - - /* set msk addresses early to ensure mptcp_pm_get_local_id() - * uses the correct data - */ - mptcp_copy_inaddrs(ctx->conn, child); - mptcp_propagate_sndbuf(ctx->conn, child); - - mptcp_rcv_space_init(owner, child); - list_add(&ctx->node, &owner->conn_list); - sock_hold(child); =20 /* with OoO packets we can reach here without ingress * mpc option --=20 2.40.1 From nobody Wed Oct 30 19:55:48 2024 Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.133.124]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 4931123C90 for ; Thu, 18 May 2023 16:59:28 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1684429167; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=5V4K9Ey6dTVLmr0bHqWYyL1al2kiA7jeAwB2tAlPqhg=; b=IBZMAx5Yz0EjJ8U/9Gjyc2VnC2lS42PRQqakF6wSh7eiS03FKSoRpkvy6ERQ+iY9J9jWSY JMvyTlOEqoUC5Exw0DMo6I3nox6BnX4B8XEmI7UxmW/bfG3YcPp79ry31/A7qyAQBNWYI6 mw1mMDmAKxtXmI4nMyoXSiWOmeYXp+4= Received: from mimecast-mx02.redhat.com (mx3-rdu2.redhat.com [66.187.233.73]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id us-mta-246-97bh-G_cPkCa-PbWfi9cCg-1; Thu, 18 May 2023 12:59:25 -0400 X-MC-Unique: 97bh-G_cPkCa-PbWfi9cCg-1 Received: from smtp.corp.redhat.com (int-mx09.intmail.prod.int.rdu2.redhat.com [10.11.54.9]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id 73E3D3806707; Thu, 18 May 2023 16:59:25 +0000 (UTC) Received: from gerbillo.redhat.com (unknown [10.39.192.55]) by smtp.corp.redhat.com (Postfix) with ESMTP id D7CA3492B01; Thu, 18 May 2023 16:59:24 +0000 (UTC) From: Paolo Abeni To: mptcp@lists.linux.dev Cc: Christoph Paasch Subject: [PATCH v2 mptcp-net 3/5] mptcp: fix data race around msk->first access Date: Thu, 18 May 2023 18:59:12 +0200 Message-Id: <34cb8d251ae3042e904a7712f207521f351bb294.1684427027.git.pabeni@redhat.com> In-Reply-To: References: Precedence: bulk X-Mailing-List: mptcp@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Scanned-By: MIMEDefang 3.1 on 10.11.54.9 X-Mimecast-Spam-Score: 0 X-Mimecast-Originator: redhat.com Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8"; x-default="true" The first subflow socket is accessed outside the msk socket lock by mptcp_subflow_fail(), we need to annotate each write access with WRITE_ONCE, but a few spots still lacks it. Fixes: 76a13b315709 ("mptcp: invoke MP_FAIL response when needed") Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau --- net/mptcp/protocol.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 38709c332367..cea9992fec98 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -90,7 +90,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk) if (err) return err; =20 - msk->first =3D ssock->sk; + WRITE_ONCE(msk->first, ssock->sk); WRITE_ONCE(msk->subflow, ssock); subflow =3D mptcp_subflow_ctx(ssock->sk); list_add(&subflow->node, &msk->conn_list); @@ -2446,7 +2446,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct= sock *ssk, sock_put(ssk); =20 if (ssk =3D=3D msk->first) - msk->first =3D NULL; + WRITE_ONCE(msk->first, NULL); =20 out: if (ssk =3D=3D msk->last_snd) @@ -2762,7 +2762,7 @@ static int __mptcp_init_sock(struct sock *sk) WRITE_ONCE(msk->rmem_released, 0); msk->timer_ival =3D TCP_RTO_MIN; =20 - msk->first =3D NULL; + WRITE_ONCE(msk->first, NULL); inet_csk(sk)->icsk_sync_mss =3D mptcp_sync_mss; WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk))); WRITE_ONCE(msk->allow_infinite_fallback, true); --=20 2.40.1 From nobody Wed Oct 30 19:55:48 2024 Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.133.124]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id DBA8F24E8C for ; Thu, 18 May 2023 16:59:28 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1684429167; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=oxPJ2/8gRAFtd58/qPb4zk3Rzvir5sID78RnbQJ6xH0=; b=NKSmoE5o75xFfvn5lp72DKfG+RVVGgs6SeSkCLHYjVyLUMUKsD/qezhOrfOK7WKViYM/LF sA0lxOjUxX+EQPsclafxttFxjh9lmmnEhIR+TthjqUSeda26jWHjkh+Bs6PXC+T1z/dfZZ zthqLJqFvdYowBGKJFq/zGWBa2Ke0K8= Received: from mimecast-mx02.redhat.com (mimecast-mx02.redhat.com [66.187.233.88]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id us-mta-516-sYVdrRd6PaKWpgWRvI1HdQ-1; Thu, 18 May 2023 12:59:26 -0400 X-MC-Unique: sYVdrRd6PaKWpgWRvI1HdQ-1 Received: from smtp.corp.redhat.com (int-mx09.intmail.prod.int.rdu2.redhat.com [10.11.54.9]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id 541C5185A78B; Thu, 18 May 2023 16:59:26 +0000 (UTC) Received: from gerbillo.redhat.com (unknown [10.39.192.55]) by smtp.corp.redhat.com (Postfix) with ESMTP id B830A492B01; Thu, 18 May 2023 16:59:25 +0000 (UTC) From: Paolo Abeni To: mptcp@lists.linux.dev Cc: Christoph Paasch Subject: [PATCH v2 mptcp-net 4/5] mptcp: add annotations around sk->sk_shutdown accesses Date: Thu, 18 May 2023 18:59:13 +0200 Message-Id: <2265cd91d7b8e011386b86aae848a603daae4cb9.1684427027.git.pabeni@redhat.com> In-Reply-To: References: Precedence: bulk X-Mailing-List: mptcp@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Scanned-By: MIMEDefang 3.1 on 10.11.54.9 X-Mimecast-Spam-Score: 0 X-Mimecast-Originator: redhat.com Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8"; x-default="true" Christoph reported the mptcp variant of a recently addressed plain TCP issue. Similar to commit e14cadfd80d7 ("tcp: add annotations around sk->sk_shutdown accesses") add READ/WRITE ONCE annotations to silence KCSAN reports around lockless sk_shutdown access. Fixes: 71ba088ce0aa ("mptcp: cleanup accept and poll") Reported-by: Christoph Paasch Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/401 Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau --- net/mptcp/protocol.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index cea9992fec98..4b24f3bc6919 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -603,7 +603,7 @@ static bool mptcp_check_data_fin(struct sock *sk) WRITE_ONCE(msk->ack_seq, msk->ack_seq + 1); WRITE_ONCE(msk->rcv_data_fin, 0); =20 - sk->sk_shutdown |=3D RCV_SHUTDOWN; + WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | RCV_SHUTDOWN); smp_mb__before_atomic(); /* SHUTDOWN must be visible first */ =20 switch (sk->sk_state) { @@ -910,7 +910,7 @@ static void mptcp_check_for_eof(struct mptcp_sock *msk) /* hopefully temporary hack: propagate shutdown status * to msk, when all subflows agree on it */ - sk->sk_shutdown |=3D RCV_SHUTDOWN; + WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | RCV_SHUTDOWN); =20 smp_mb__before_atomic(); /* SHUTDOWN must be visible first */ sk->sk_data_ready(sk); @@ -2553,7 +2553,7 @@ static void mptcp_check_fastclose(struct mptcp_sock *= msk) } =20 inet_sk_state_store(sk, TCP_CLOSE); - sk->sk_shutdown =3D SHUTDOWN_MASK; + WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK); smp_mb__before_atomic(); /* SHUTDOWN must be visible first */ set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags); =20 @@ -3006,7 +3006,7 @@ bool __mptcp_close(struct sock *sk, long timeout) bool do_cancel_work =3D false; int subflows_alive =3D 0; =20 - sk->sk_shutdown =3D SHUTDOWN_MASK; + WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK); =20 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) { mptcp_listen_inuse_dec(sk); @@ -3149,7 +3149,7 @@ static int mptcp_disconnect(struct sock *sk, int flag= s) mptcp_pm_data_reset(msk); mptcp_ca_reset(sk); =20 - sk->sk_shutdown =3D 0; + WRITE_ONCE(sk->sk_shutdown, 0); sk_error_report(sk); return 0; } @@ -3856,9 +3856,6 @@ static __poll_t mptcp_check_writeable(struct mptcp_so= ck *msk) { struct sock *sk =3D (struct sock *)msk; =20 - if (unlikely(sk->sk_shutdown & SEND_SHUTDOWN)) - return EPOLLOUT | EPOLLWRNORM; - if (sk_stream_is_writeable(sk)) return EPOLLOUT | EPOLLWRNORM; =20 @@ -3876,6 +3873,7 @@ static __poll_t mptcp_poll(struct file *file, struct = socket *sock, struct sock *sk =3D sock->sk; struct mptcp_sock *msk; __poll_t mask =3D 0; + u8 shutdown; int state; =20 msk =3D mptcp_sk(sk); @@ -3892,17 +3890,22 @@ static __poll_t mptcp_poll(struct file *file, struc= t socket *sock, return inet_csk_listen_poll(ssock->sk); } =20 + shutdown =3D READ_ONCE(sk->sk_shutdown); + if (shutdown =3D=3D SHUTDOWN_MASK || state =3D=3D TCP_CLOSE) + mask |=3D EPOLLHUP; + if (shutdown & RCV_SHUTDOWN) + mask |=3D EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; + if (state !=3D TCP_SYN_SENT && state !=3D TCP_SYN_RECV) { mask |=3D mptcp_check_readable(msk); - mask |=3D mptcp_check_writeable(msk); + if (shutdown & SEND_SHUTDOWN) + mask |=3D EPOLLOUT | EPOLLWRNORM; + else + mask |=3D mptcp_check_writeable(msk); } else if (state =3D=3D TCP_SYN_SENT && inet_sk(sk)->defer_connect) { /* cf tcp_poll() note about TFO */ mask |=3D EPOLLOUT | EPOLLWRNORM; } - if (sk->sk_shutdown =3D=3D SHUTDOWN_MASK || state =3D=3D TCP_CLOSE) - mask |=3D EPOLLHUP; - if (sk->sk_shutdown & RCV_SHUTDOWN) - mask |=3D EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; =20 /* This barrier is coupled with smp_wmb() in __mptcp_error_report() */ smp_rmb(); --=20 2.40.1 From nobody Wed Oct 30 19:55:48 2024 Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.129.124]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id C7CE524E86 for ; Thu, 18 May 2023 16:59:29 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1684429168; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=adwPBPHdnk1QIyvCuHO9SxciI8J1l73qF4LWlJNqDxc=; b=EF81KtsaOzLrH/VMux/3cifvyqf+BMYXOooaOLiWshG6l77k6Us/bNK7hYp6C+75hu0TqV YNRXp0NbguaUofBqFgjw1r5wWVgWppjcHNHKNWqo/we/RabHlsJDN6HdrMkAX23Otaejxn mqFJr07qfEKrSn+pZ7/0J5qCZEuGTfM= Received: from mimecast-mx02.redhat.com (mx3-rdu2.redhat.com [66.187.233.73]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id us-mta-177-xB-ycCkAOjKzNb_S9UDSYg-1; Thu, 18 May 2023 12:59:27 -0400 X-MC-Unique: xB-ycCkAOjKzNb_S9UDSYg-1 Received: from smtp.corp.redhat.com (int-mx09.intmail.prod.int.rdu2.redhat.com [10.11.54.9]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id 38E2E29AB3F1; Thu, 18 May 2023 16:59:27 +0000 (UTC) Received: from gerbillo.redhat.com (unknown [10.39.192.55]) by smtp.corp.redhat.com (Postfix) with ESMTP id 97ABA492B01; Thu, 18 May 2023 16:59:26 +0000 (UTC) From: Paolo Abeni To: mptcp@lists.linux.dev Cc: Christoph Paasch Subject: [PATCH v2 mptcp-net 5/5] mptcp: fix active subflow finalization. Date: Thu, 18 May 2023 18:59:14 +0200 Message-Id: <41d7df9d8669b5fbfe70cd5551efc9245007ae6c.1684427027.git.pabeni@redhat.com> In-Reply-To: References: Precedence: bulk X-Mailing-List: mptcp@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Scanned-By: MIMEDefang 3.1 on 10.11.54.9 X-Mimecast-Spam-Score: 0 X-Mimecast-Originator: redhat.com Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8"; x-default="true" Active subflow are inserted into the connection list at creation time. When the MPJ handshake completes succesfully, a the new subflow creation netlink event is generated correctly, but the current code wrongly avoid initializing a couple of subflow data. The above will cause misbehavior on a few exceptional events: unneeded mptcp-level retransmission on msk-level sequence wrap-around and infinite mapping fallback even when a MPJ socket is present. Address the issue factoring out the needed initialization in a new helper and invoking the latter from __mptcp_finish_join() time for passive subflow and from mptcp_finish_join() for active ones. Fixes: 0530020a7c8f ("mptcp: track and update contiguous data status") Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau --- net/mptcp/protocol.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 4b24f3bc6919..28da6a9fe8fd 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -825,6 +825,13 @@ void mptcp_data_ready(struct sock *sk, struct sock *ss= k) mptcp_data_unlock(sk); } =20 +static void mptcp_subflow_joined(struct mptcp_sock *msk, struct sock *ssk) +{ + mptcp_subflow_ctx(ssk)->map_seq =3D READ_ONCE(msk->ack_seq); + WRITE_ONCE(msk->allow_infinite_fallback, false); + mptcp_event(MPTCP_EVENT_SUB_ESTABLISHED, msk, ssk, GFP_ATOMIC); +} + static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk) { struct sock *sk =3D (struct sock *)msk; @@ -839,6 +846,7 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk,= struct sock *ssk) mptcp_sock_graft(ssk, sk->sk_socket); =20 mptcp_sockopt_sync_locked(msk, ssk); + mptcp_subflow_joined(msk, ssk); return true; } =20 @@ -3532,14 +3540,16 @@ bool mptcp_finish_join(struct sock *ssk) return false; } =20 - if (!list_empty(&subflow->node)) - goto out; + /* active subflow, already present inside the conn_list */ + if (!list_empty(&subflow->node)) { + mptcp_subflow_joined(msk, ssk); + return true; + } =20 if (!mptcp_pm_allow_new_subflow(msk)) goto err_prohibited; =20 - /* active connections are already on conn_list. - * If we can't acquire msk socket lock here, let the release callback + /* If we can't acquire msk socket lock here, let the release callback * handle it */ mptcp_data_lock(parent); @@ -3562,11 +3572,6 @@ bool mptcp_finish_join(struct sock *ssk) return false; } =20 - subflow->map_seq =3D READ_ONCE(msk->ack_seq); - WRITE_ONCE(msk->allow_infinite_fallback, false); - -out: - mptcp_event(MPTCP_EVENT_SUB_ESTABLISHED, msk, ssk, GFP_ATOMIC); return true; } =20 --=20 2.40.1