From nobody Tue Apr 16 13:20:12 2024 Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.129.124]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 0C2C1AD55 for ; Fri, 13 Jan 2023 18:21:16 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1673634076; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=crnLFwiuGnAVoBbhcD9j2SENj04mwlUFDL73n8KsthI=; b=HuYage9hCWM0evOLOc75VNYJthjhCVemYG6DKl8LCSt51DhhWUc7oPwPMo/cbbVMBMpNB3 mf7stIN/iK2rMvgHvOWYW650nudP8nPpXnYL6q7p1PC2JVKRtSQ35r/MoUd9pJ520MNwBi gj84O5nKClPCy+W/h1aawOac7yEFUzQ= Received: from mimecast-mx02.redhat.com (mimecast-mx02.redhat.com [66.187.233.88]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id us-mta-125-tBymzAojOHm_akWgJnL4DQ-1; Fri, 13 Jan 2023 13:21:06 -0500 X-MC-Unique: tBymzAojOHm_akWgJnL4DQ-1 Received: from smtp.corp.redhat.com (int-mx02.intmail.prod.int.rdu2.redhat.com [10.11.54.2]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id 988CE802D2A for ; Fri, 13 Jan 2023 18:21:06 +0000 (UTC) Received: from gerbillo.redhat.com (unknown [10.39.194.89]) by smtp.corp.redhat.com (Postfix) with ESMTP id 2595E4078903 for ; Fri, 13 Jan 2023 18:21:06 +0000 (UTC) From: Paolo Abeni To: mptcp@lists.linux.dev Subject: [PATCH mptcp-next 1/9] mptcp: refactor passive socket initialization. Date: Fri, 13 Jan 2023 19:20:48 +0100 Message-Id: In-Reply-To: References: Precedence: bulk X-Mailing-List: mptcp@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Scanned-By: MIMEDefang 3.1 on 10.11.54.2 X-Mimecast-Spam-Score: 0 X-Mimecast-Originator: redhat.com Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8"; x-default="true" After commit 30e51b923e43 ("mptcp: fix unreleased socket in accept queue") unaccepted msk sockets go throu complete shutdown, we don't need anymore to delay inserting the first subflow into the subflow lists. The reference counting deserve some extra care, as __mptcp_close() is unaware of the request socket linkage to the first subflow. Signed-off-by: Paolo Abeni --- Notes: - this schema assumes that the TCP code will never drop a request socket from the receive queue after the 3whs. I tried to verify such assumption as strictily as I could, but more eyes more then welcome! - this will cause pktdrill failure for close_before_accept.pkt, because the msk will become fully established before accept() - imho a good thing - and send out add_addr earlier. The pktdrill test change should be easier. --- net/mptcp/protocol.c | 17 ----------------- net/mptcp/subflow.c | 31 ++++++++++++++++++++++++------- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 003b44a79fce..d298d629b3b2 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -825,7 +825,6 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk,= struct sock *ssk) if (sk->sk_socket && !ssk->sk_socket) mptcp_sock_graft(ssk, sk->sk_socket); =20 - mptcp_propagate_sndbuf((struct sock *)msk, ssk); mptcp_sockopt_sync_locked(msk, ssk); return true; } @@ -3753,22 +3752,6 @@ static int mptcp_stream_accept(struct socket *sock, = struct socket *newsock, =20 lock_sock(newsk); =20 - /* PM/worker can now acquire the first subflow socket - * lock without racing with listener queue cleanup, - * we can notify it, if needed. - * - * Even if remote has reset the initial subflow by now - * the refcnt is still at least one. - */ - subflow =3D mptcp_subflow_ctx(msk->first); - list_add(&subflow->node, &msk->conn_list); - sock_hold(msk->first); - if (mptcp_is_fully_established(newsk)) - mptcp_pm_fully_established(msk, msk->first, GFP_KERNEL); - - mptcp_rcv_space_init(msk, msk->first); - mptcp_propagate_sndbuf(newsk, msk->first); - /* set ssk->sk_socket of accept()ed flows to mptcp socket. * This is needed so NOSPACE flag can be set from tcp stack. */ diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index ec54413fb31f..0b7e7a6606f5 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -749,6 +749,7 @@ static struct sock *subflow_syn_recv_sock(const struct = sock *sk, struct mptcp_options_received mp_opt; bool fallback, fallback_is_fatal; struct sock *new_msk =3D NULL; + struct mptcp_sock *owner; struct sock *child; =20 pr_debug("listener=3D%p, req=3D%p, conn=3D%p", listener, req, listener->c= onn); @@ -823,6 +824,8 @@ static struct sock *subflow_syn_recv_sock(const struct = sock *sk, ctx->setsockopt_seq =3D listener->setsockopt_seq; =20 if (ctx->mp_capable) { + owner =3D mptcp_sk(new_msk); + /* this can't race with mptcp_close(), as the msk is * not yet exposted to user-space */ @@ -831,14 +834,14 @@ static struct sock *subflow_syn_recv_sock(const struc= t sock *sk, /* record the newly created socket as the first msk * subflow, but don't link it yet into conn_list */ - WRITE_ONCE(mptcp_sk(new_msk)->first, child); + WRITE_ONCE(owner->first, child); =20 /* new mpc subflow takes ownership of the newly * created mptcp socket */ mptcp_sk(new_msk)->setsockopt_seq =3D ctx->setsockopt_seq; - mptcp_pm_new_connection(mptcp_sk(new_msk), child, 1); - mptcp_token_accept(subflow_req, mptcp_sk(new_msk)); + mptcp_pm_new_connection(owner, child, 1); + mptcp_token_accept(subflow_req, owner); ctx->conn =3D new_msk; new_msk =3D NULL; =20 @@ -846,15 +849,21 @@ static struct sock *subflow_syn_recv_sock(const struc= t sock *sk, * uses the correct data */ mptcp_copy_inaddrs(ctx->conn, child); + mptcp_propagate_sndbuf(ctx->conn, child); + + mptcp_rcv_space_init(owner, child); + list_add(&ctx->node, &owner->conn_list); + sock_hold(child); =20 /* with OoO packets we can reach here without ingress * mpc option */ - if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK) + if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK) { mptcp_subflow_fully_established(ctx, &mp_opt); + mptcp_pm_fully_established(owner, child, GFP_ATOMIC); + ctx->pm_notified =3D 1; + } } else if (ctx->mp_join) { - struct mptcp_sock *owner; - owner =3D subflow_req->msk; if (!owner) { subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT); @@ -1836,9 +1845,17 @@ void mptcp_subflow_queue_clean(struct sock *listener= _sk, struct sock *listener_s sock_hold(sk); lock_sock_nested(sk, SINGLE_DEPTH_NESTING); next =3D msk->dl_next; - msk->first =3D NULL; msk->dl_next =3D NULL; =20 + /* The upcoming mptcp_close is going to drop all the references + * to the first subflow, ignoring that one of such reference is + * owned by the request socket still in the accept queue and that + * later inet_csk_listen_stop will drop it. + * Acquire an extra reference here to avoid an UaF at that point. + */ + if (msk->first) + sock_hold(msk->first); + do_cancel_work =3D __mptcp_close(sk, 0); release_sock(sk); if (do_cancel_work) { --=20 2.38.1 From nobody Tue Apr 16 13:20:12 2024 Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.133.124]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 6FF46AD4A for ; Fri, 13 Jan 2023 18:23:31 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1673634210; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=W9OgPz1q3u28kuyyhgSILyfjwOGipZa7LJr2kB8AlHA=; b=MH+PCyJ/m7QTSGOm+Wc1gqQa6HGtSeSrqLsOoNrOPCZNGWVw9Q/Q/kdCOjb6N1giaTRz3v G7yzq5DsgvQpJQA7AzvSL/o8xqtHrhammP3mNufxtC55rIuYwOnoOEnV+w2LGZ+f95RVw0 NPBRWhVvshTxz+QSfeiotL5n+p7kAiU= Received: from mimecast-mx02.redhat.com (mx3-rdu2.redhat.com [66.187.233.73]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id us-mta-462-3zAOGbQqP2qw116bWdAPLQ-1; Fri, 13 Jan 2023 13:21:45 -0500 X-MC-Unique: 3zAOGbQqP2qw116bWdAPLQ-1 Received: from smtp.corp.redhat.com (int-mx02.intmail.prod.int.rdu2.redhat.com [10.11.54.2]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id 66CCD380673D for ; Fri, 13 Jan 2023 18:21:07 +0000 (UTC) Received: from gerbillo.redhat.com (unknown [10.39.194.89]) by smtp.corp.redhat.com (Postfix) with ESMTP id E2DD34078903 for ; Fri, 13 Jan 2023 18:21:06 +0000 (UTC) From: Paolo Abeni To: mptcp@lists.linux.dev Subject: [PATCH mptcp-next 2/9] mptcp: drop unneeded argument Date: Fri, 13 Jan 2023 19:20:49 +0100 Message-Id: <964fa27c1f71f563b2cc395c38627f62c3bfa116.1673629755.git.pabeni@redhat.com> In-Reply-To: References: Precedence: bulk X-Mailing-List: mptcp@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Scanned-By: MIMEDefang 3.1 on 10.11.54.2 X-Mimecast-Spam-Score: 0 X-Mimecast-Originator: redhat.com Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8"; x-default="true" After the previous commit every mptcp_pm_fully_established() is always invoked with a GFP_ATOMIC argument. We can drop it. Signed-off-by: Paolo Abeni --- net/mptcp/options.c | 2 +- net/mptcp/pm.c | 4 ++-- net/mptcp/protocol.h | 2 +- net/mptcp/subflow.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index b30cea2fbf3f..99c4f9e9bb90 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -1001,7 +1001,7 @@ static bool check_fully_established(struct mptcp_sock= *msk, struct sock *ssk, clear_3rdack_retransmission(ssk); mptcp_pm_subflow_established(msk); } else { - mptcp_pm_fully_established(msk, ssk, GFP_ATOMIC); + mptcp_pm_fully_established(msk, ssk); } return true; =20 diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 8e0cf6275e94..4ed4d29d9c11 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -126,7 +126,7 @@ static bool mptcp_pm_schedule_work(struct mptcp_sock *m= sk, return true; } =20 -void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock = *ssk, gfp_t gfp) +void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock = *ssk) { struct mptcp_pm_data *pm =3D &msk->pm; bool announce =3D false; @@ -150,7 +150,7 @@ void mptcp_pm_fully_established(struct mptcp_sock *msk,= const struct sock *ssk, spin_unlock_bh(&pm->lock); =20 if (announce) - mptcp_event(MPTCP_EVENT_ESTABLISHED, msk, ssk, gfp); + mptcp_event(MPTCP_EVENT_ESTABLISHED, msk, ssk, GFP_ATOMIC); } =20 void mptcp_pm_connection_closed(struct mptcp_sock *msk) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 871ec3e93314..5f1a30959b5c 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -799,7 +799,7 @@ bool mptcp_pm_addr_families_match(const struct sock *sk, void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock = *ssk); void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct so= ck *ssk); void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ss= k, int server_side); -void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock = *ssk, gfp_t gfp); +void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock = *ssk); bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk); void mptcp_pm_connection_closed(struct mptcp_sock *msk); void mptcp_pm_subflow_established(struct mptcp_sock *msk); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 0b7e7a6606f5..769ac8b7900d 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -860,7 +860,7 @@ static struct sock *subflow_syn_recv_sock(const struct = sock *sk, */ if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK) { mptcp_subflow_fully_established(ctx, &mp_opt); - mptcp_pm_fully_established(owner, child, GFP_ATOMIC); + mptcp_pm_fully_established(owner, child); ctx->pm_notified =3D 1; } } else if (ctx->mp_join) { --=20 2.38.1 From nobody Tue Apr 16 13:20:12 2024 Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.133.124]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id E7FC1AD54 for ; Fri, 13 Jan 2023 18:21:10 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1673634069; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=bjPkWWNE9azhq5ojepZFEiOca5tuHazJAF5vXdx5JrE=; b=Zatgc6QlxpGyL21piARM4sibgcoxTzLo1gmGl3Aa++kWZpf9nY2ZhrrNHI5GqYXHxmcZbx VhQKuoOai6XVIR43HUqP3DMveNlcj73Te88bVQWQhAzmAgcK1CLI4j1RfTRUVlnkiy4y9c jPC+Syr42/9wrvJvlcggD1TA9mLg5II= Received: from mimecast-mx02.redhat.com (mx3-rdu2.redhat.com [66.187.233.73]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id us-mta-312-DXfj28aNNuicg3R5hx83gQ-1; Fri, 13 Jan 2023 13:21:08 -0500 X-MC-Unique: DXfj28aNNuicg3R5hx83gQ-1 Received: from smtp.corp.redhat.com (int-mx02.intmail.prod.int.rdu2.redhat.com [10.11.54.2]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id 34A1E1C00423 for ; Fri, 13 Jan 2023 18:21:08 +0000 (UTC) Received: from gerbillo.redhat.com (unknown [10.39.194.89]) by smtp.corp.redhat.com (Postfix) with ESMTP id B01274078903 for ; Fri, 13 Jan 2023 18:21:07 +0000 (UTC) From: Paolo Abeni To: mptcp@lists.linux.dev Subject: [PATCH mptcp-next 3/9] mptcp: drop legacy code. Date: Fri, 13 Jan 2023 19:20:50 +0100 Message-Id: In-Reply-To: References: Precedence: bulk X-Mailing-List: mptcp@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Scanned-By: MIMEDefang 3.1 on 10.11.54.2 X-Mimecast-Spam-Score: 0 X-Mimecast-Originator: redhat.com Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8"; x-default="true" After the previous commits the PM worker can't race anymore with the unaccepted subflow close and disposal, as the msk keeps a reference to such subflow. We can remove the now irrelevant and confusing checks explicitly preventing the mentioned race. Signed-off-by: Paolo Abeni --- net/mptcp/options.c | 7 +------ net/mptcp/subflow.c | 7 +++---- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 99c4f9e9bb90..91d5b59540e9 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -988,12 +988,7 @@ static bool check_fully_established(struct mptcp_sock = *msk, struct sock *ssk, mptcp_subflow_fully_established(subflow, mp_opt); =20 check_notify: - /* if the subflow is not already linked into the conn_list, we can't - * notify the PM: this subflow is still on the listener queue - * and the PM possibly acquiring the subflow lock could race with - * the listener close - */ - if (likely(subflow->pm_notified) || list_empty(&subflow->node)) + if (likely(subflow->pm_notified)) return true; =20 subflow->pm_notified =3D 1; diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 769ac8b7900d..d3a17f924377 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1933,11 +1933,10 @@ static void subflow_ulp_release(struct sock *ssk) =20 sk =3D ctx->conn; if (sk) { - /* if the msk has been orphaned, keep the ctx - * alive, will be freed by __mptcp_close_ssk(), - * when the subflow is still unaccepted + /* if the subflow has been closed by the TCP stack, keep + * the ctx alive, will be freed by __mptcp_close_ssk() */ - release =3D ctx->disposable || list_empty(&ctx->node); + release =3D ctx->disposable; sock_put(sk); } =20 --=20 2.38.1 From nobody Tue Apr 16 13:20:12 2024 Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.133.124]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id C0E9FAD4A for ; Fri, 13 Jan 2023 18:21:11 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1673634070; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=Urf3hoMTp5aUZpkqV/sx089kflZIxdgw8J3T7wTpJck=; b=gdQbqw8dhtK4PRMT+h9SGAAtU0m09cckblJzbvJde/2wJon/ppu2FVYTBCv2K7u82WJ/DS DXCF1WbyNV3WltNlbQ02Oeo67Bpi03vcCzSBrt1qcgEuLni3QRB9YtNYVjrt27xMUI+mes huzb39Rk7U8hswP4/E96vbvLsxjaeEI= Received: from mimecast-mx02.redhat.com (mimecast-mx02.redhat.com [66.187.233.88]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id us-mta-126-IGtC_MkyPMqWUue-L2E6DA-1; Fri, 13 Jan 2023 13:21:09 -0500 X-MC-Unique: IGtC_MkyPMqWUue-L2E6DA-1 Received: from smtp.corp.redhat.com (int-mx02.intmail.prod.int.rdu2.redhat.com [10.11.54.2]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id 06376858F0E for ; Fri, 13 Jan 2023 18:21:09 +0000 (UTC) Received: from gerbillo.redhat.com (unknown [10.39.194.89]) by smtp.corp.redhat.com (Postfix) with ESMTP id 818374078903 for ; Fri, 13 Jan 2023 18:21:08 +0000 (UTC) From: Paolo Abeni To: mptcp@lists.linux.dev Subject: [PATCH mptcp-next 4/9] mptcp: avoid unneeded __mptcp_nmpc_socket() usage Date: Fri, 13 Jan 2023 19:20:51 +0100 Message-Id: <67ebfe6c67672d72728d02bb92503843aaae826e.1673629755.git.pabeni@redhat.com> In-Reply-To: References: Precedence: bulk X-Mailing-List: mptcp@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Scanned-By: MIMEDefang 3.1 on 10.11.54.2 X-Mimecast-Spam-Score: 0 X-Mimecast-Originator: redhat.com Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8"; x-default="true" In a few spots the mptcp code invokes the __mptcp_nmpc_socket() helper multiple times under the same socket lock scope. Additionally, in such places, the socket status ensure that threre is not an MP capable handshake running. Under the above condition we can replace the later __mptcp_nmpc_socket() helper invocation with direct access to the msk->subflow pointer and better document such access is not supposed to fail with WARN(). Signed-off-by: Paolo Abeni --- net/mptcp/protocol.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index d298d629b3b2..fc13f1e45137 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3172,7 +3172,7 @@ static struct sock *mptcp_accept(struct sock *sk, int= flags, int *err, struct socket *listener; struct sock *newsk; =20 - listener =3D __mptcp_nmpc_socket(msk); + listener =3D msk->subflow; if (WARN_ON_ONCE(!listener)) { *err =3D -EINVAL; return NULL; @@ -3392,7 +3392,7 @@ static int mptcp_get_port(struct sock *sk, unsigned s= hort snum) struct mptcp_sock *msk =3D mptcp_sk(sk); struct socket *ssock; =20 - ssock =3D __mptcp_nmpc_socket(msk); + ssock =3D msk->subflow; pr_debug("msk=3D%p, subflow=3D%p", msk, ssock); if (WARN_ON_ONCE(!ssock)) return -EINVAL; @@ -3738,7 +3738,10 @@ static int mptcp_stream_accept(struct socket *sock, = struct socket *newsock, =20 pr_debug("msk=3D%p", msk); =20 - ssock =3D __mptcp_nmpc_socket(msk); + /* buggy applications can call accept on socket states other then LISTEN + * but no need to allocate the first subflow just to error out. + */ + ssock =3D msk->subflow; if (!ssock) return -EINVAL; =20 --=20 2.38.1 From nobody Tue Apr 16 13:20:12 2024 Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.133.124]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id F408BAD55 for ; Fri, 13 Jan 2023 18:21:12 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1673634071; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=wK4RE43VMqB6iWd0pnqnnfgFaVYAWsLn0ak8bm2GhhU=; b=TGo+SxYiuRAlFq9FD3wqc6bRTzmF2LEoYhS0+AMIrwtGalM3o/79VBq7oYwEC/f5TKOq6u CMnZ4i7BZ1q1HKY8VUI+4V20+hOcKzHqDkzbQxLoCjCG7X0kP0UV/El+UpeorvCUz8YvLH SQ1COmlXdA0MkvwDRBidS95/ZzraOTY= Received: from mimecast-mx02.redhat.com (mx3-rdu2.redhat.com [66.187.233.73]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id us-mta-518-P1KTS1gWN9u6jPuFBcFIBg-1; Fri, 13 Jan 2023 13:21:10 -0500 X-MC-Unique: P1KTS1gWN9u6jPuFBcFIBg-1 Received: from smtp.corp.redhat.com (int-mx02.intmail.prod.int.rdu2.redhat.com [10.11.54.2]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id C736E3C1486B for ; Fri, 13 Jan 2023 18:21:09 +0000 (UTC) Received: from gerbillo.redhat.com (unknown [10.39.194.89]) by smtp.corp.redhat.com (Postfix) with ESMTP id 4BD5D4078903 for ; Fri, 13 Jan 2023 18:21:09 +0000 (UTC) From: Paolo Abeni To: mptcp@lists.linux.dev Subject: [PATCH mptcp-next 5/9] mptcp: move fastopen subflow check inside mptcp_sendmsg_fastopen() Date: Fri, 13 Jan 2023 19:20:52 +0100 Message-Id: In-Reply-To: References: Precedence: bulk X-Mailing-List: mptcp@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Scanned-By: MIMEDefang 3.1 on 10.11.54.2 X-Mimecast-Spam-Score: 0 X-Mimecast-Originator: redhat.com Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8"; x-default="true" So that we can avoid a bunch of check in fastpath. Additionally we can specialize such check according to the specific fastopen method - defer_connect vs MSG_FASTOPEN. The latter bits will simplify the next patches. Signed-off-by: Paolo Abeni --- net/mptcp/protocol.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index fc13f1e45137..9c4c729bf271 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1694,13 +1694,27 @@ static void mptcp_set_nospace(struct sock *sk) =20 static int mptcp_disconnect(struct sock *sk, int flags); =20 -static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struc= t msghdr *msg, +static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, size_t len, int *copied_syn) { unsigned int saved_flags =3D msg->msg_flags; struct mptcp_sock *msk =3D mptcp_sk(sk); + struct sock *ssk; int ret; =20 + /* on flags based fastopen the mptcp is supposed to create the + * first subflow right now. Otherwise we are in the defer_connect + * path, and the first subflow must be already present. + * Since the defer_connect flag is cleared after the first succsful + * fastopen attempt, no need to check for additional subflow status. + */ + if (msg->msg_flags & MSG_FASTOPEN && !__mptcp_nmpc_socket(msk)) + return -EINVAL; + if (!msk->first) + return -EINVAL; + + ssk =3D msk->first; + lock_sock(ssk); msg->msg_flags |=3D MSG_DONTWAIT; msk->connect_flags =3D O_NONBLOCK; @@ -1723,6 +1737,7 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, st= ruct sock *ssk, struct msgh } else if (ret && ret !=3D -EINPROGRESS) { mptcp_disconnect(sk, 0); } + inet_sk(sk)->defer_connect =3D 0; =20 return ret; } @@ -1731,7 +1746,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msgh= dr *msg, size_t len) { struct mptcp_sock *msk =3D mptcp_sk(sk); struct page_frag *pfrag; - struct socket *ssock; size_t copied =3D 0; int ret =3D 0; long timeo; @@ -1741,12 +1755,10 @@ static int mptcp_sendmsg(struct sock *sk, struct ms= ghdr *msg, size_t len) =20 lock_sock(sk); =20 - ssock =3D __mptcp_nmpc_socket(msk); - if (unlikely(ssock && (inet_sk(ssock->sk)->defer_connect || - msg->msg_flags & MSG_FASTOPEN))) { + if (unlikely(inet_sk(sk)->defer_connect || msg->msg_flags & MSG_FASTOPEN)= ) { int copied_syn =3D 0; =20 - ret =3D mptcp_sendmsg_fastopen(sk, ssock->sk, msg, len, &copied_syn); + ret =3D mptcp_sendmsg_fastopen(sk, msg, len, &copied_syn); copied +=3D copied_syn; if (ret =3D=3D -EINPROGRESS && copied_syn > 0) goto out; --=20 2.38.1 From nobody Tue Apr 16 13:20:12 2024 Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.129.124]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id A7FD9AD57 for ; Fri, 13 Jan 2023 18:21:13 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1673634072; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=3/A23NdEPHns/1STQ1NGXsZjNNvgck+PnG52iuic8Lo=; b=AePMubd7WlnGoqTxuOE28UTzuyz7N8ciFCIE+Xw/cmCIQGmD+Kq/PESMt6U6ZpzORQGoQG RVA2O+0bAJ9wfgo0NHNuJheRie1bkbArqQpOV35Y1M1vC5VGXEC2u++9VHudkrxeDqSNZJ pTtccfIJTnXdtaNUiXEMQ9PSVJuVhOo= Received: from mimecast-mx02.redhat.com (mimecast-mx02.redhat.com [66.187.233.88]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id us-mta-261-bBlWxPJ7ObycsM6gfXheGA-1; Fri, 13 Jan 2023 13:21:11 -0500 X-MC-Unique: bBlWxPJ7ObycsM6gfXheGA-1 Received: from smtp.corp.redhat.com (int-mx02.intmail.prod.int.rdu2.redhat.com [10.11.54.2]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id 938D685C1B7 for ; Fri, 13 Jan 2023 18:21:10 +0000 (UTC) Received: from gerbillo.redhat.com (unknown [10.39.194.89]) by smtp.corp.redhat.com (Postfix) with ESMTP id 1D1194078903 for ; Fri, 13 Jan 2023 18:21:09 +0000 (UTC) From: Paolo Abeni To: mptcp@lists.linux.dev Subject: [PATCH mptcp-next 6/9] mptcp: move first subflow allocation at mpc access time Date: Fri, 13 Jan 2023 19:20:53 +0100 Message-Id: <241299ba21925d0dff397d2e84b66a3280e213dc.1673629755.git.pabeni@redhat.com> In-Reply-To: References: Precedence: bulk X-Mailing-List: mptcp@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Scanned-By: MIMEDefang 3.1 on 10.11.54.2 X-Mimecast-Spam-Score: 0 X-Mimecast-Originator: redhat.com Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8"; x-default="true" In the long run this will simplify the mptcp code and will allow for more consistent behavior. Move the first subflow allocation out of the sock->init ops into the __mptcp_nmpc_socket() helper. Since the first subflow creation can now happen after the first setsockopt() we additionally need to invoke mptcp_sockopt_sync() on it. Signed-off-by: Paolo Abeni --- net/mptcp/pm_netlink.c | 4 +-- net/mptcp/protocol.c | 57 +++++++++++++++++++++++++++--------------- net/mptcp/protocol.h | 2 +- net/mptcp/sockopt.c | 18 ++++++------- 4 files changed, 49 insertions(+), 32 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index e82a112b8779..290f88fffe5f 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1014,8 +1014,8 @@ static int mptcp_pm_nl_create_listen_socket(struct so= ck *sk, return -EINVAL; =20 ssock =3D __mptcp_nmpc_socket(msk); - if (!ssock) - return -EINVAL; + if (IS_ERR(ssock)) + return PTR_ERR(ssock); =20 mptcp_info2sockaddr(&entry->addr, &addr, entry->addr.family); #if IS_ENABLED(CONFIG_MPTCP_IPV6) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 9c4c729bf271..1b93624db62d 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -49,18 +49,6 @@ static void __mptcp_check_send_data_fin(struct sock *sk); DEFINE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions); static struct net_device mptcp_napi_dev; =20 -/* If msk has an initial subflow socket, and the MP_CAPABLE handshake has = not - * completed yet or has failed, return the subflow socket. - * Otherwise return NULL. - */ -struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk) -{ - if (!msk->subflow || READ_ONCE(msk->can_ack)) - return NULL; - - return msk->subflow; -} - /* Returns end sequence number of the receiver's advertised window */ static u64 mptcp_wnd_end(const struct mptcp_sock *msk) { @@ -116,6 +104,31 @@ static int __mptcp_socket_create(struct mptcp_sock *ms= k) return 0; } =20 +/* Returns the first subflow socket if available and the MPC + * handshake is not started yet. + */ +struct socket *__mptcp_nmpc_socket(struct mptcp_sock *msk) +{ + struct sock *sk =3D (struct sock *)msk; + int ret; + + if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) + return ERR_PTR(-EINVAL); + + if (!msk->subflow) { + if (msk->first) + return ERR_PTR(-EINVAL); + + ret =3D __mptcp_socket_create(msk); + if (ret) + return ERR_PTR(ret); + + mptcp_sockopt_sync(msk, msk->first); + } + + return msk->subflow; +} + static void mptcp_drop(struct sock *sk, struct sk_buff *skb) { sk_drops_add(sk, skb); @@ -1699,6 +1712,7 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, st= ruct msghdr *msg, { unsigned int saved_flags =3D msg->msg_flags; struct mptcp_sock *msk =3D mptcp_sk(sk); + struct socket *ssock; struct sock *ssk; int ret; =20 @@ -1708,8 +1722,11 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, s= truct msghdr *msg, * Since the defer_connect flag is cleared after the first succsful * fastopen attempt, no need to check for additional subflow status. */ - if (msg->msg_flags & MSG_FASTOPEN && !__mptcp_nmpc_socket(msk)) - return -EINVAL; + if (msg->msg_flags & MSG_FASTOPEN) { + ssock =3D __mptcp_nmpc_socket(msk); + if (IS_ERR(ssock)) + return PTR_ERR(ssock); + } if (!msk->first) return -EINVAL; =20 @@ -3592,8 +3609,8 @@ static int mptcp_connect(struct sock *sk, struct sock= addr *uaddr, int addr_len) int err =3D -EINVAL; =20 ssock =3D __mptcp_nmpc_socket(msk); - if (!ssock) - return -EINVAL; + if (IS_ERR(ssock)) + return PTR_ERR(ssock); =20 mptcp_token_destroy(msk); inet_sk_state_store(sk, TCP_SYN_SENT); @@ -3681,8 +3698,8 @@ static int mptcp_bind(struct socket *sock, struct soc= kaddr *uaddr, int addr_len) =20 lock_sock(sock->sk); ssock =3D __mptcp_nmpc_socket(msk); - if (!ssock) { - err =3D -EINVAL; + if (IS_ERR(ssock)) { + err =3D PTR_ERR(ssock); goto unlock; } =20 @@ -3718,8 +3735,8 @@ static int mptcp_listen(struct socket *sock, int back= log) =20 lock_sock(sk); ssock =3D __mptcp_nmpc_socket(msk); - if (!ssock) { - err =3D -EINVAL; + if (IS_ERR(ssock)) { + err =3D PTR_ERR(ssock); goto unlock; } =20 diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 5f1a30959b5c..3a055438c65e 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -632,7 +632,7 @@ void __mptcp_subflow_send_ack(struct sock *ssk); void mptcp_subflow_reset(struct sock *ssk); void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk); void mptcp_sock_graft(struct sock *sk, struct socket *parent); -struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk); +struct socket *__mptcp_nmpc_socket(struct mptcp_sock *msk); bool __mptcp_close(struct sock *sk, long timeout); void mptcp_cancel_work(struct sock *sk); void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk); diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 9986681aaf40..022a6cad00c1 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -301,9 +301,9 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_soc= k *msk, int optname, case SO_BINDTOIFINDEX: lock_sock(sk); ssock =3D __mptcp_nmpc_socket(msk); - if (!ssock) { + if (IS_ERR(ssock)) { release_sock(sk); - return -EINVAL; + return PTR_ERR(ssock); } =20 ret =3D sock_setsockopt(ssock, SOL_SOCKET, optname, optval, optlen); @@ -396,9 +396,9 @@ static int mptcp_setsockopt_v6(struct mptcp_sock *msk, = int optname, case IPV6_FREEBIND: lock_sock(sk); ssock =3D __mptcp_nmpc_socket(msk); - if (!ssock) { + if (IS_ERR(ssock)) { release_sock(sk); - return -EINVAL; + return PTR_ERR(ssock); } =20 ret =3D tcp_setsockopt(ssock->sk, SOL_IPV6, optname, optval, optlen); @@ -693,9 +693,9 @@ static int mptcp_setsockopt_sol_ip_set_transparent(stru= ct mptcp_sock *msk, int o lock_sock(sk); =20 ssock =3D __mptcp_nmpc_socket(msk); - if (!ssock) { + if (IS_ERR(ssock)) { release_sock(sk); - return -EINVAL; + return PTR_ERR(ssock); } =20 issk =3D inet_sk(ssock->sk); @@ -764,8 +764,8 @@ static int mptcp_setsockopt_first_sf_only(struct mptcp_= sock *msk, int level, int =20 /* Limit to first subflow, before the connection establishment */ sock =3D __mptcp_nmpc_socket(msk); - if (!sock) - return -EINVAL; + if (IS_ERR(sock)) + return PTR_ERR(sock); =20 return tcp_setsockopt(sock->sk, level, optname, optval, optlen); } @@ -865,7 +865,7 @@ static int mptcp_getsockopt_first_sf_only(struct mptcp_= sock *msk, int level, int } =20 ssock =3D __mptcp_nmpc_socket(msk); - if (!ssock) + if (IS_ERR(ssock)) goto out; =20 ret =3D tcp_getsockopt(ssock->sk, level, optname, optval, optlen); --=20 2.38.1 From nobody Tue Apr 16 13:20:12 2024 Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.129.124]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id A5FD8AD54 for ; Fri, 13 Jan 2023 18:21:13 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1673634072; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=t2szHhoL05307008MozVzBw9QdSsIq4UyYSwxBlQfqo=; b=HhePZPTxcxadb7IhE1dvFPcIbSG0hvxz1XLMe8mmgrbckDnj91NU8pXAoY3vKHdiOdvmB7 ymEkiim4CGGZer5N3iblEQ+LZIm8cslOopyHH9m7uvQpmI0Qb/gVFjp8B960T+jt2XiM7c f1RPMakJNc1NFJ2yfAoTsH5rZ2h4XJo= Received: from mimecast-mx02.redhat.com (mimecast-mx02.redhat.com [66.187.233.88]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id us-mta-211-QpxhwlSEOVK_Z6pVhRRRnQ-1; Fri, 13 Jan 2023 13:21:11 -0500 X-MC-Unique: QpxhwlSEOVK_Z6pVhRRRnQ-1 Received: from smtp.corp.redhat.com (int-mx02.intmail.prod.int.rdu2.redhat.com [10.11.54.2]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id 5C04A85C1A0 for ; Fri, 13 Jan 2023 18:21:11 +0000 (UTC) Received: from gerbillo.redhat.com (unknown [10.39.194.89]) by smtp.corp.redhat.com (Postfix) with ESMTP id DEB7C4078903 for ; Fri, 13 Jan 2023 18:21:10 +0000 (UTC) From: Paolo Abeni To: mptcp@lists.linux.dev Subject: [PATCH mptcp-next 7/9] mptcp: do not keep around the first subflow after disconnect. Date: Fri, 13 Jan 2023 19:20:54 +0100 Message-Id: <64723f701e27c7f44d96b7a28a25857e3a93c942.1673629755.git.pabeni@redhat.com> In-Reply-To: References: Precedence: bulk X-Mailing-List: mptcp@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Scanned-By: MIMEDefang 3.1 on 10.11.54.2 X-Mimecast-Spam-Score: 0 X-Mimecast-Originator: redhat.com Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8"; x-default="true" After the previous patch the first subflow is allocated as needed at bind, connect, listen time. We don't need anymore to keep alive the first subflow after a disconnect just to be able to perform such syscall. Overal this change makes the passive and active sockets consistent: even passive sockets will be allowed to complete life cycle after disconnect. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/290 Signed-off-by: Paolo Abeni --- net/mptcp/protocol.c | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 1b93624db62d..9d440746704d 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2371,11 +2371,9 @@ static void __mptcp_close_ssk(struct sock *sk, struc= t sock *ssk, unsigned int flags) { struct mptcp_sock *msk =3D mptcp_sk(sk); - bool need_push, dispose_it; + bool need_push; =20 - dispose_it =3D !msk->subflow || ssk !=3D msk->subflow->sk; - if (dispose_it) - list_del(&subflow->node); + list_del(&subflow->node); =20 lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); =20 @@ -2389,15 +2387,6 @@ static void __mptcp_close_ssk(struct sock *sk, struc= t sock *ssk, } =20 need_push =3D (flags & MPTCP_CF_PUSH) && __mptcp_retransmit_pending_data(= sk); - if (!dispose_it) { - tcp_disconnect(ssk, 0); - msk->subflow->state =3D SS_UNCONNECTED; - mptcp_subflow_ctx_reset(subflow); - release_sock(ssk); - - goto out; - } - sock_orphan(ssk); subflow->disposable =3D 1; =20 @@ -2424,10 +2413,11 @@ static void __mptcp_close_ssk(struct sock *sk, stru= ct sock *ssk, =20 sock_put(ssk); =20 - if (ssk =3D=3D msk->first) + if (ssk =3D=3D msk->first) { msk->first =3D NULL; + mptcp_dispose_initial_subflow(msk); + } =20 -out: if (ssk =3D=3D msk->last_snd) msk->last_snd =3D NULL; =20 @@ -3274,10 +3264,6 @@ static void mptcp_destroy(struct sock *sk) { struct mptcp_sock *msk =3D mptcp_sk(sk); =20 - /* clears msk->subflow, allowing the following to close - * even the initial subflow - */ - mptcp_dispose_initial_subflow(msk); mptcp_destroy_common(msk, 0); sk_sockets_allocated_dec(sk); } --=20 2.38.1 From nobody Tue Apr 16 13:20:12 2024 Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.133.124]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 16B0FAD55 for ; Fri, 13 Jan 2023 18:21:14 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1673634074; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=ggf3M+W8TT1rfmXGh2ofEladlxwPVJxhQzKMOuC5Tio=; b=Axiu1qqywq54lNFho6kT0NvSL5iPJ1FDczNMfxIjGDiSDiUiduhcxC6J0j1eXfBRgZtDnI 1PoJXwHy9aRMa3bQiSpDl9BK2oVS/jB/uavQsBsABPzE+ey9i7RFNfMLg5TfpR7AIbFSOp 4h0BL/8mldi1gyw+79X6pl5Y96KSsag= Received: from mimecast-mx02.redhat.com (mx3-rdu2.redhat.com [66.187.233.73]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id us-mta-486-3lZwcFFiORqI_VRQzw4vjw-1; Fri, 13 Jan 2023 13:21:12 -0500 X-MC-Unique: 3lZwcFFiORqI_VRQzw4vjw-1 Received: from smtp.corp.redhat.com (int-mx02.intmail.prod.int.rdu2.redhat.com [10.11.54.2]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id 264D61C0041B for ; Fri, 13 Jan 2023 18:21:12 +0000 (UTC) Received: from gerbillo.redhat.com (unknown [10.39.194.89]) by smtp.corp.redhat.com (Postfix) with ESMTP id A4C9B4078903 for ; Fri, 13 Jan 2023 18:21:11 +0000 (UTC) From: Paolo Abeni To: mptcp@lists.linux.dev Subject: [PATCH mptcp-next 8/9] mptcp: fastclose msk when cleaning unaccepted sockets Date: Fri, 13 Jan 2023 19:20:55 +0100 Message-Id: In-Reply-To: References: Precedence: bulk X-Mailing-List: mptcp@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Scanned-By: MIMEDefang 3.1 on 10.11.54.2 X-Mimecast-Spam-Score: 0 X-Mimecast-Originator: redhat.com Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8"; x-default="true" When cleaning up unaccepted mptcp socket still laying inside the listener queue at listener close time, such sockets will go through a regular close, waiting for a timeout before shutting down the subflows. There is no need to keep the kernel resources in use for such a possibly long time: short-circuit to fast-close. Signed-off-by: Paolo Abeni --- net/mptcp/protocol.c | 7 +++++-- net/mptcp/subflow.c | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 9d440746704d..e5d1d2747e31 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2981,10 +2981,13 @@ bool __mptcp_close(struct sock *sk, long timeout) goto cleanup; } =20 - if (mptcp_check_readable(msk)) { - /* the msk has read data, do the MPTCP equivalent of TCP reset */ + if (mptcp_check_readable(msk) || timeout < 0) { + /* If the msk has read data, or the caller explicitly ask it, + * do the MPTCP equivalent of TCP reset, aka MTCP fastclose + */ inet_sk_state_store(sk, TCP_CLOSE); mptcp_do_fastclose(sk); + timeout =3D 0; } else if (mptcp_close_state(sk)) { __mptcp_wr_shutdown(sk); } diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index d3a17f924377..b4d693089476 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1856,7 +1856,7 @@ void mptcp_subflow_queue_clean(struct sock *listener_= sk, struct sock *listener_s if (msk->first) sock_hold(msk->first); =20 - do_cancel_work =3D __mptcp_close(sk, 0); + do_cancel_work =3D __mptcp_close(sk, -1); release_sock(sk); if (do_cancel_work) { /* lockdep will report a false positive ABBA deadlock --=20 2.38.1 From nobody Tue Apr 16 13:20:12 2024 Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.133.124]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 6836DAD56 for ; Fri, 13 Jan 2023 18:21:15 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1673634074; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=kM8PELoRbqwEgpC+2DjJUcI9v8FtOva6p7FYrOfa3oA=; b=UuVd2BkXxmopLFe5dli5DcftoyKc7Rhgl5O82n43Gkd73oWGeHgvLTmgUNnujvAoAccEWx qm2VWtSz0IIer90eTYuanlcbmi6SsuFrWQpq5ZcQYeABJNcKJ22A8NA1TMRV2Yw9ZDiM9D GHk8b2Z/BbH2RFQAZ65tXrSm4VeUOw0= Received: from mimecast-mx02.redhat.com (mx3-rdu2.redhat.com [66.187.233.73]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id us-mta-634-JP9Ds1D_N36z4jPzlP-0kQ-1; Fri, 13 Jan 2023 13:21:13 -0500 X-MC-Unique: JP9Ds1D_N36z4jPzlP-0kQ-1 Received: from smtp.corp.redhat.com (int-mx02.intmail.prod.int.rdu2.redhat.com [10.11.54.2]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id E0F0B1C0513B for ; Fri, 13 Jan 2023 18:21:12 +0000 (UTC) Received: from gerbillo.redhat.com (unknown [10.39.194.89]) by smtp.corp.redhat.com (Postfix) with ESMTP id 6DBB24078904 for ; Fri, 13 Jan 2023 18:21:12 +0000 (UTC) From: Paolo Abeni To: mptcp@lists.linux.dev Subject: [PATCH mptcp-next 9/9] mptcp: refactor mptcp_stream_accept() Date: Fri, 13 Jan 2023 19:20:56 +0100 Message-Id: In-Reply-To: References: Precedence: bulk X-Mailing-List: mptcp@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Scanned-By: MIMEDefang 3.1 on 10.11.54.2 X-Mimecast-Spam-Score: 0 X-Mimecast-Originator: redhat.com Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8"; x-default="true" Rewrite the mptcp socket accept op, partially open-codying inet_accept(), instead of indirectly calling it. This way we can avoid acquiring the new socket lock twice and we can avoid a couple of indirect calls. Signed-off-by: Paolo Abeni --- net/mptcp/protocol.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index e5d1d2747e31..cfc112f73d6e 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3752,6 +3752,7 @@ static int mptcp_stream_accept(struct socket *sock, s= truct socket *newsock, { struct mptcp_sock *msk =3D mptcp_sk(sock->sk); struct socket *ssock; + struct sock *newsk; int err; =20 pr_debug("msk=3D%p", msk); @@ -3763,16 +3764,26 @@ static int mptcp_stream_accept(struct socket *sock,= struct socket *newsock, if (!ssock) return -EINVAL; =20 - err =3D ssock->ops->accept(sock, newsock, flags, kern); - if (err =3D=3D 0 && !mptcp_is_tcpsk(newsock->sk)) { - struct mptcp_sock *msk =3D mptcp_sk(newsock->sk); + newsk =3D mptcp_accept(sock->sk, flags, &err, kern); + if (!newsk) + return err; + + lock_sock(newsk); + + sock_rps_record_flow(newsk); + WARN_ON(!((1 << newsk->sk_state) & + (TCPF_ESTABLISHED | TCPF_SYN_RECV | + TCPF_CLOSE_WAIT | TCPF_CLOSE))); + + sock_graft(newsk, newsock); + + newsock->state =3D SS_CONNECTED; + if (!mptcp_is_tcpsk(newsock->sk)) { + struct mptcp_sock *msk =3D mptcp_sk(newsk); struct mptcp_subflow_context *subflow; - struct sock *newsk =3D newsock->sk; =20 set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags); =20 - lock_sock(newsk); - /* set ssk->sk_socket of accept()ed flows to mptcp socket. * This is needed so NOSPACE flag can be set from tcp stack. */ @@ -3782,10 +3793,10 @@ static int mptcp_stream_accept(struct socket *sock,= struct socket *newsock, if (!ssk->sk_socket) mptcp_sock_graft(ssk, newsock); } - release_sock(newsk); } + release_sock(newsk); =20 - return err; + return 0; } =20 static __poll_t mptcp_check_writeable(struct mptcp_sock *msk) --=20 2.38.1