From nobody Mon Dec 15 11:54:17 2025 Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.133.124]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 3E59C182B4 for ; Wed, 24 May 2023 13:50:23 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1684936222; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=1eVJzRfpuRDooyTPMWRikPWwGwaEqCH2TqU1KiUpAPA=; b=iqb923u3CL96+F0vyoRTrK19+yw8AYXJZg8pCMJm56sml9zZI9JaBH12zUsRqW11rDleCx hsKsC77dUUCD8mrCahmDfKkobJ8zN4e0H4+Dw1wVGucIIs0lMU5F99cIR0FXWct4AZj3B3 Eca+wRHB5CesbBoilDGPdtzDs5O8KE4= Received: from mimecast-mx02.redhat.com (mimecast-mx02.redhat.com [66.187.233.88]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id us-mta-86-m_R21tkFNDGLY103a5QrEA-1; Wed, 24 May 2023 09:50:19 -0400 X-MC-Unique: m_R21tkFNDGLY103a5QrEA-1 Received: from smtp.corp.redhat.com (int-mx05.intmail.prod.int.rdu2.redhat.com [10.11.54.5]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id C658785A5BB; Wed, 24 May 2023 13:50:18 +0000 (UTC) Received: from gerbillo.redhat.com (unknown [10.39.193.138]) by smtp.corp.redhat.com (Postfix) with ESMTP id 313AF7AE4; Wed, 24 May 2023 13:50:18 +0000 (UTC) From: Paolo Abeni To: mptcp@lists.linux.dev Cc: Florian Westphal Subject: [PATCH v4 mptcp-next 1/6] mptcp: add subflow unique id Date: Wed, 24 May 2023 15:50:06 +0200 Message-Id: <1c34f66818566a46418b5de33e0a32f572604615.1684935727.git.pabeni@redhat.com> In-Reply-To: References: Precedence: bulk X-Mailing-List: mptcp@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Scanned-By: MIMEDefang 3.1 on 10.11.54.5 X-Mimecast-Spam-Score: 0 X-Mimecast-Originator: redhat.com Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8"; x-default="true" The user-space need to preperly account the data received/sent by individual subflows. When additional subflows are created and/or closed during the MPTCP socket lifetime, the information currently exposed via MPTCP_TCPINFO are not enough: subflows are identifed only by the sequential position inside the info dumps, and that will change with the above mentioned events. To solve the above problem, this patch introduces a new subflow identifier that is unique inside the given mptcp socket scope. The initial subflow get the id 1 and the other subflows get incremental values at join time. Signed-off-by: Paolo Abeni --- v2 -> v3: - fix msk subflow_id init (Matttbe) v1 -> v2: - properly set subflow_id for the first passive subflow and active subflow= s, too - drop the tcpi_fackets overload --- net/mptcp/protocol.c | 6 ++++++ net/mptcp/protocol.h | 5 ++++- net/mptcp/subflow.c | 2 ++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 28da6a9fe8fd..9998b2dd150e 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -96,6 +96,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk) list_add(&subflow->node, &msk->conn_list); sock_hold(ssock->sk); subflow->request_mptcp =3D 1; + subflow->subflow_id =3D msk->subflow_id++; =20 /* This is the first subflow, always with id 0 */ subflow->local_id_valid =3D 1; @@ -845,6 +846,7 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk,= struct sock *ssk) if (sk->sk_socket && !ssk->sk_socket) mptcp_sock_graft(ssk, sk->sk_socket); =20 + mptcp_subflow_ctx(ssk)->subflow_id =3D msk->subflow_id++; mptcp_sockopt_sync_locked(msk, ssk); mptcp_subflow_joined(msk, ssk); return true; @@ -2775,6 +2777,7 @@ static int __mptcp_init_sock(struct sock *sk) WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk))); WRITE_ONCE(msk->allow_infinite_fallback, true); msk->recovery =3D false; + msk->subflow_id =3D 1; =20 mptcp_pm_data_init(msk); =20 @@ -3206,6 +3209,9 @@ struct sock *mptcp_sk_clone_init(const struct sock *s= k, msk->setsockopt_seq =3D mptcp_sk(sk)->setsockopt_seq; mptcp_init_sched(msk, mptcp_sk(sk)->sched); =20 + /* passive msk is created after the first/MPC subflow */ + msk->subflow_id =3D 2; + sock_reset_flag(nsk, SOCK_RCU_FREE); security_inet_csk_clone(nsk, req); =20 diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index de94c01746dc..f9180ecce5e4 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -319,7 +319,8 @@ struct mptcp_sock { u64 rtt_us; /* last maximum rtt of subflows */ } rcvq_space; =20 - u32 setsockopt_seq; + u32 subflow_id; + u32 setsockopt_seq; char ca_name[TCP_CA_NAME_MAX]; struct mptcp_sock *dl_next; }; @@ -501,6 +502,8 @@ struct mptcp_subflow_context { u8 reset_reason:4; u8 stale_count; =20 + u32 subflow_id; + long delegated_status; unsigned long fail_tout; =20 diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 63ac4dc621d4..c7001a23550a 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -819,6 +819,7 @@ static struct sock *subflow_syn_recv_sock(const struct = sock *sk, if (!ctx->conn) goto fallback; =20 + ctx->subflow_id =3D 1; owner =3D mptcp_sk(ctx->conn); mptcp_pm_new_connection(owner, child, 1); =20 @@ -1574,6 +1575,7 @@ int __mptcp_subflow_connect(struct sock *sk, const st= ruct mptcp_addr_info *loc, subflow->remote_id =3D remote_id; subflow->request_join =3D 1; subflow->request_bkup =3D !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP); + subflow->subflow_id =3D msk->subflow_id++; mptcp_info2sockaddr(remote, &addr, ssk->sk_family); =20 sock_hold(ssk); --=20 2.40.1 From nobody Mon Dec 15 11:54:17 2025 Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.129.124]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id E9EAE1FDD for ; Wed, 24 May 2023 13:50:23 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1684936222; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=lj+tBXYvbndjJD/pWpIbeW3GxYgMLW4dQEortLPRZXw=; b=eA07e6ePqpnA4t29iTVnZUVEV+Q8fegR1QvRDRaFqczFTNXTAbkIFRV4+tJIbPmvPD8UlM jLkcqJ+xjX73owCayIcZe6+j4PhmgocDsyG6mergdRgIULBvGCO/BcU1jZKgggadDS+dUx F/Pcuzx9RTIUJhxt1oS0b3aQOQHVfLA= Received: from mimecast-mx02.redhat.com (mimecast-mx02.redhat.com [66.187.233.88]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id us-mta-3-M2j8U5OkNLWGFyNyxgDe-g-1; Wed, 24 May 2023 09:50:20 -0400 X-MC-Unique: M2j8U5OkNLWGFyNyxgDe-g-1 Received: from smtp.corp.redhat.com (int-mx05.intmail.prod.int.rdu2.redhat.com [10.11.54.5]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id ABE23101A593; Wed, 24 May 2023 13:50:19 +0000 (UTC) Received: from gerbillo.redhat.com (unknown [10.39.193.138]) by smtp.corp.redhat.com (Postfix) with ESMTP id 16E797AE4; Wed, 24 May 2023 13:50:18 +0000 (UTC) From: Paolo Abeni To: mptcp@lists.linux.dev Cc: Florian Westphal Subject: [PATCH v4 mptcp-next 2/6] mptcp: introduce MPTCP_FULL_INFO getsockopt Date: Wed, 24 May 2023 15:50:07 +0200 Message-Id: <72ebf53600698f09b96900f8b3fbd2d4c1261b6c.1684935727.git.pabeni@redhat.com> In-Reply-To: References: Precedence: bulk X-Mailing-List: mptcp@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Scanned-By: MIMEDefang 3.1 on 10.11.54.5 X-Mimecast-Spam-Score: 0 X-Mimecast-Originator: redhat.com Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8"; x-default="true" Some user-space applications want to monitor the subflows utilization. Dumping the per subflow tcp_info is not enough, as the PM could close and re-create the subflows under-the-hood, fooling the accounting. Even checking the src/dst addresses used by each subflow could not be enough, because new subflows could re-use the same address/port of the just closed one. This patch introduces a new socket option, allow dumping all the relevant information all-at-once (everything, everywhere...), in a consistent manner. To reuse the existing helper to manipulate the new struct, keep the binary layout of the initial few fields the same as mptcp_subflow_data. Signed-off-by: Paolo Abeni --- v3 -> v4: - full_info struct re-design (Florian) v2 -> v3: - added missing changelog (oops) --- include/uapi/linux/mptcp.h | 25 ++++++ net/mptcp/sockopt.c | 165 +++++++++++++++++++++++++++++++++---- 2 files changed, 174 insertions(+), 16 deletions(-) diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 32af2d278cb4..37c46cf05795 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -12,6 +12,7 @@ #include /* for sockaddr_in */ #include /* for sockaddr_in6 */ #include /* for sockaddr_storage and sa_family */ +#include /* for tcp_info */ =20 #define MPTCP_SUBFLOW_FLAG_MCAP_REM _BITUL(0) #define MPTCP_SUBFLOW_FLAG_MCAP_LOC _BITUL(1) @@ -244,9 +245,33 @@ struct mptcp_subflow_addrs { }; }; =20 +struct mptcp_subflow_info { + __u32 id; + struct mptcp_subflow_addrs addrs; +}; + +struct mptcp_subflow_full_info { + __u32 size_subflow_full_info; /* size of this structure in userspace */ + __u32 num_subflows_kern; /* must be 0, set by kernel (real subflow count= ) */ + __u32 size_tcpinfo_kernel; /* must be 0, set by kernel */ + __u32 size_tcpinfo_user; + __u32 size_sfinfo_kernel; /* must be 0, set by kernel */ + __u32 size_sfinfo_user; + __u32 num_subflows_user; /* max subflows that userspace is interested in; + * the buffers at subflow_info_addr/tcp_info_addr + * are respectively at least: + * num_subflows_user * size_sfinfo_user + * num_subflows_user * size_tcpinfo_user + * bytes wide + */ + __aligned_u64 subflow_info_addr; + __aligned_u64 tcp_info_addr; +} __attribute__((aligned(8))); + /* MPTCP socket options */ #define MPTCP_INFO 1 #define MPTCP_TCPINFO 2 #define MPTCP_SUBFLOW_ADDRS 3 +#define MPTCP_FULL_INFO 4 =20 #endif /* _UAPI_MPTCP_H */ diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index d4258869ac48..59a174ee1d54 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -14,7 +14,8 @@ #include #include "protocol.h" =20 -#define MIN_INFO_OPTLEN_SIZE 16 +#define MIN_INFO_OPTLEN_SIZE 16 +#define MIN_FULL_INFO_OPTLEN_SIZE 48 =20 static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk) { @@ -943,12 +944,13 @@ static int mptcp_getsockopt_info(struct mptcp_sock *m= sk, char __user *optval, in return 0; } =20 -static int mptcp_put_subflow_data(struct mptcp_subflow_data *sfd, - char __user *optval, - u32 copied, - int __user *optlen) +static int __mptcp_put_subflow_data(struct mptcp_subflow_data *sfd, + int size_subflow_data_kern, + char __user *optval, + u32 copied, + int __user *optlen) { - u32 copylen =3D min_t(u32, sfd->size_subflow_data, sizeof(*sfd)); + u32 copylen =3D min_t(u32, sfd->size_subflow_data, size_subflow_data_kern= ); =20 if (copied) copied +=3D sfd->size_subflow_data; @@ -964,25 +966,30 @@ static int mptcp_put_subflow_data(struct mptcp_subflo= w_data *sfd, return 0; } =20 -static int mptcp_get_subflow_data(struct mptcp_subflow_data *sfd, - char __user *optval, int __user *optlen) +static int mptcp_put_subflow_data(struct mptcp_subflow_data *sfd, + char __user *optval, + u32 copied, + int __user *optlen) +{ + return __mptcp_put_subflow_data(sfd, sizeof(*sfd), optval, copied, optlen= ); +} + +static int __mptcp_get_subflow_data(struct mptcp_subflow_data *sfd, + int min_info_optlen_size, + char __user *optval, + int __user *optlen) { int len, copylen; =20 if (get_user(len, optlen)) return -EFAULT; =20 - /* if mptcp_subflow_data size is changed, need to adjust - * this function to deal with programs using old version. - */ - BUILD_BUG_ON(sizeof(*sfd) !=3D MIN_INFO_OPTLEN_SIZE); - - if (len < MIN_INFO_OPTLEN_SIZE) + if (len < min_info_optlen_size) return -EINVAL; =20 memset(sfd, 0, sizeof(*sfd)); =20 - copylen =3D min_t(unsigned int, len, sizeof(*sfd)); + copylen =3D min_t(unsigned int, len, min_info_optlen_size); if (copy_from_user(sfd, optval, copylen)) return -EFAULT; =20 @@ -991,7 +998,7 @@ static int mptcp_get_subflow_data(struct mptcp_subflow_= data *sfd, sfd->size_user > INT_MAX) return -EINVAL; =20 - if (sfd->size_subflow_data < MIN_INFO_OPTLEN_SIZE || + if (sfd->size_subflow_data < min_info_optlen_size || sfd->size_subflow_data > len) return -EINVAL; =20 @@ -1001,6 +1008,19 @@ static int mptcp_get_subflow_data(struct mptcp_subfl= ow_data *sfd, return len - sfd->size_subflow_data; } =20 +static int mptcp_get_subflow_data(struct mptcp_subflow_data *sfd, + char __user *optval, + int __user *optlen) +{ + /* if mptcp_subflow_data size is changed, need to adjust + * this function to deal with programs using old version. + */ + BUILD_BUG_ON(sizeof(*sfd) !=3D MIN_INFO_OPTLEN_SIZE); + + return __mptcp_get_subflow_data(sfd, MIN_INFO_OPTLEN_SIZE, + optval, optlen); +} + static int mptcp_getsockopt_tcpinfo(struct mptcp_sock *msk, char __user *o= ptval, int __user *optlen) { @@ -1146,6 +1166,117 @@ static int mptcp_getsockopt_subflow_addrs(struct mp= tcp_sock *msk, char __user *o return 0; } =20 +static int mptcp_get_full_subflow_info(struct mptcp_subflow_full_info *sff= i, + char __user *optval, + int __user *optlen) +{ + struct mptcp_subflow_data *sfd =3D (struct mptcp_subflow_data *)sffi; + int len; + + BUILD_BUG_ON(sizeof(*sffi) !=3D MIN_FULL_INFO_OPTLEN_SIZE); + + len =3D __mptcp_get_subflow_data(sfd, MIN_FULL_INFO_OPTLEN_SIZE, + optval, optlen); + if (len < 0) + return len; + + if (sffi->size_tcpinfo_kernel) + return -EINVAL; + + if (sffi->size_sfinfo_user > INT_MAX) + return -EINVAL; + + return len; +} + +static int mptcp_put_subflow_full_info(struct mptcp_subflow_full_info *sff= i, + char __user *optval, + u32 copied, + int __user *optlen) +{ + struct mptcp_subflow_data *sfd =3D (struct mptcp_subflow_data *)sffi; + + return __mptcp_put_subflow_data(sfd, sizeof(*sffi), optval, copied, optle= n); +} + +static int mptcp_getsockopt_full_info(struct mptcp_sock *msk, char __user = *optval, + int __user *optlen) +{ + struct mptcp_subflow_context *subflow; + struct sock *sk =3D (struct sock *)msk; + unsigned int sfcount =3D 0, copied =3D 0; + struct mptcp_subflow_full_info sffi; + void __user *tcpinfoptr, *sfinfoptr; + int len; + + len =3D mptcp_get_full_subflow_info(&sffi, optval, optlen); + if (len < 0) + return len; + + /* don't bother filling the mptcp info if there is not enough + * user-space-provided storage + */ + if (len > 0) { + struct mptcp_info mptcp_info; + char __user *infoptr; + int mptcp_info_len; + + infoptr =3D optval + sffi.size_subflow_full_info; + memset(&mptcp_info, 0, sizeof(mptcp_info)); + mptcp_info_len =3D min_t(unsigned int, len, sizeof(struct mptcp_info)); + + mptcp_diag_fill_info(msk, &mptcp_info); + + if (copy_to_user(infoptr, &mptcp_info, mptcp_info_len)) + return -EFAULT; + + copied +=3D mptcp_info_len; + } + + sffi.size_tcpinfo_kernel =3D sizeof(struct tcp_info); + sffi.size_tcpinfo_user =3D min_t(unsigned int, sffi.size_tcpinfo_user, + sizeof(struct tcp_info)); + sfinfoptr =3D (void __force __user *)sffi.subflow_info_addr; + sffi.size_sfinfo_kernel =3D sizeof(struct mptcp_subflow_info); + sffi.size_sfinfo_user =3D min_t(unsigned int, sffi.size_sfinfo_user, + sizeof(struct mptcp_subflow_info)); + tcpinfoptr =3D (void __force __user *)sffi.tcp_info_addr; + + lock_sock(sk); + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk =3D mptcp_subflow_tcp_sock(subflow); + + if (sfcount++ < sffi.num_subflows_user) { + struct mptcp_subflow_info sfinfo; + struct tcp_info tcp_info; + + memset(&sfinfo, 0, sizeof(sfinfo)); + sfinfo.id =3D subflow->subflow_id; + mptcp_get_sub_addrs(ssk, &sfinfo.addrs); + if (copy_to_user(sfinfoptr, &sfinfo, sffi.size_sfinfo_user)) + goto fail_release; + + tcp_get_info(ssk, &tcp_info); + if (copy_to_user(tcpinfoptr, &tcp_info, sffi.size_tcpinfo_user)) + goto fail_release; + + tcpinfoptr +=3D sffi.size_tcpinfo_user; + sfinfoptr +=3D sffi.size_sfinfo_user; + } + } + release_sock(sk); + + sffi.num_subflows_kern =3D sfcount; + if (mptcp_put_subflow_full_info(&sffi, optval, copied, optlen)) + return -EFAULT; + + return 0; + +fail_release: + release_sock(sk); + return -EFAULT; +} + static int mptcp_put_int_option(struct mptcp_sock *msk, char __user *optva= l, int __user *optlen, int val) { @@ -1219,6 +1350,8 @@ static int mptcp_getsockopt_sol_mptcp(struct mptcp_so= ck *msk, int optname, switch (optname) { case MPTCP_INFO: return mptcp_getsockopt_info(msk, optval, optlen); + case MPTCP_FULL_INFO: + return mptcp_getsockopt_full_info(msk, optval, optlen); case MPTCP_TCPINFO: return mptcp_getsockopt_tcpinfo(msk, optval, optlen); case MPTCP_SUBFLOW_ADDRS: --=20 2.40.1 From nobody Mon Dec 15 11:54:17 2025 Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.133.124]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 4152C182D1 for ; Wed, 24 May 2023 13:50:23 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1684936222; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=UXg+vaMupeIkklAkNYBEKxYeIjDDhEdSH33BXUqlVTo=; b=cvWJTuVdMswGboU5UdfUW6U8Tr8n0AsDfhQpK+HUZzdi95e2KkMH2t9fB28Eb9FDeQghXD cd7uqIeAd60Alq29q8ED2mQIrMwy/oq4gmYQEcAU9CZmNDn1XU3ETcay7bMnD1PSVd6C4o 0vaUC0+r7s7HrLB+l7aYI1GiXuAXa0Y= Received: from mimecast-mx02.redhat.com (mimecast-mx02.redhat.com [66.187.233.88]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id us-mta-262-GzslMqhxOH-mPkcqNz7SUA-1; Wed, 24 May 2023 09:50:20 -0400 X-MC-Unique: GzslMqhxOH-mPkcqNz7SUA-1 Received: from smtp.corp.redhat.com (int-mx05.intmail.prod.int.rdu2.redhat.com [10.11.54.5]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id 9116D8007D9; Wed, 24 May 2023 13:50:20 +0000 (UTC) Received: from gerbillo.redhat.com (unknown [10.39.193.138]) by smtp.corp.redhat.com (Postfix) with ESMTP id EFD537AF5; Wed, 24 May 2023 13:50:19 +0000 (UTC) From: Paolo Abeni To: mptcp@lists.linux.dev Cc: Florian Westphal Subject: [PATCH v4 mptcp-next 3/6] mptcp: move snd_una update earlier for fallback socket. Date: Wed, 24 May 2023 15:50:08 +0200 Message-Id: In-Reply-To: References: Precedence: bulk X-Mailing-List: mptcp@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Scanned-By: MIMEDefang 3.1 on 10.11.54.5 X-Mimecast-Spam-Score: 0 X-Mimecast-Originator: redhat.com Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8"; x-default="true" That will avoid an unneeded conditional in both the fast-path and in the fallback case and will simplify a bit the next patch. Signed-off-by: Paolo Abeni --- net/mptcp/options.c | 6 ++++++ net/mptcp/protocol.c | 6 ------ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 8a8083207be4..4bdcd2b326bd 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -1119,6 +1119,12 @@ bool mptcp_incoming_options(struct sock *sk, struct = sk_buff *skb) mptcp_data_lock(subflow->conn); if (sk_stream_memory_free(sk)) __mptcp_check_push(subflow->conn, sk); + + /* on fallback we just need to ignore the msk-level snd_una, as + * this is really plain TCP + */ + msk->snd_una =3D READ_ONCE(msk->snd_nxt); + __mptcp_data_acked(subflow->conn); mptcp_data_unlock(subflow->conn); return true; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 9998b2dd150e..89fee2ac84e2 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1006,12 +1006,6 @@ static void __mptcp_clean_una(struct sock *sk) struct mptcp_data_frag *dtmp, *dfrag; u64 snd_una; =20 - /* on fallback we just need to ignore snd_una, as this is really - * plain TCP - */ - if (__mptcp_check_fallback(msk)) - msk->snd_una =3D READ_ONCE(msk->snd_nxt); - snd_una =3D msk->snd_una; list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list) { if (after64(dfrag->data_seq + dfrag->data_len, snd_una)) --=20 2.40.1 From nobody Mon Dec 15 11:54:17 2025 Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.129.124]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 3E4FC182DE for ; Wed, 24 May 2023 13:50:24 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1684936223; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=rLU/1B6wX8JT3ecBfcWiqjMgKU7SMpwmUQgxr9j/nYE=; b=c0g/PxMQt1FJGekYCxvAYoO1r4loDLcDDroVSg1Jn6dXya8JGMMJpXb8CNzMYRGbUYZmTM kN3UE5DKwdiCkcAC/5buUO8DOkuolGfexvmDGm2BkHV2vlfx6lh7qdTe/77dYANdiutoKQ 3U9jLkfr3TvU+2yFnUGBFr9dDWGToig= Received: from mimecast-mx02.redhat.com (mimecast-mx02.redhat.com [66.187.233.88]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id us-mta-133-WoqbR1SaNs-5VWUCp623Fw-1; Wed, 24 May 2023 09:50:21 -0400 X-MC-Unique: WoqbR1SaNs-5VWUCp623Fw-1 Received: from smtp.corp.redhat.com (int-mx05.intmail.prod.int.rdu2.redhat.com [10.11.54.5]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id 71665800888; Wed, 24 May 2023 13:50:21 +0000 (UTC) Received: from gerbillo.redhat.com (unknown [10.39.193.138]) by smtp.corp.redhat.com (Postfix) with ESMTP id D52067AF5; Wed, 24 May 2023 13:50:20 +0000 (UTC) From: Paolo Abeni To: mptcp@lists.linux.dev Cc: Florian Westphal Subject: [PATCH v4 mptcp-next 4/6] mptcp: track some aggregate data counters. Date: Wed, 24 May 2023 15:50:09 +0200 Message-Id: In-Reply-To: References: Precedence: bulk X-Mailing-List: mptcp@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Scanned-By: MIMEDefang 3.1 on 10.11.54.5 X-Mimecast-Spam-Score: 0 X-Mimecast-Originator: redhat.com Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8"; x-default="true" Currently there are no data transfer counters accounting for all the subflows used by a given MPTCP socket. The user-space can compute such figures aggregating the subflow info, but that is inaccurate if any subflow is closed before the MPTCP socket itself. Add the new counters in the MPTCP socket itself and expose them via the existing diag and sockopt. While touching mptcp_diag_fill_info(), acquire the relevant locks before fetching the msk data, to ensure better data consistency Signed-off-by: Paolo Abeni --- include/uapi/linux/mptcp.h | 5 +++++ net/mptcp/options.c | 10 ++++++++-- net/mptcp/protocol.c | 12 +++++++++++- net/mptcp/protocol.h | 4 ++++ net/mptcp/sockopt.c | 22 +++++++++++++++++----- 5 files changed, 45 insertions(+), 8 deletions(-) diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 37c46cf05795..9445c5c06053 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -124,6 +124,11 @@ struct mptcp_info { __u8 mptcpi_local_addr_used; __u8 mptcpi_local_addr_max; __u8 mptcpi_csum_enabled; + __u32 mptcpi_retransmits; + __u64 mptcpi_bytes_retrans; + __u64 mptcpi_bytes_sent; + __u64 mptcpi_bytes_received; + __u64 mptcpi_bytes_acked; }; =20 /* diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 4bdcd2b326bd..c254accb14de 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -1026,6 +1026,12 @@ u64 __mptcp_expand_seq(u64 old_seq, u64 cur_seq) return cur_seq; } =20 +static void __mptcp_snd_una_update(struct mptcp_sock *msk, u64 new_snd_una) +{ + msk->bytes_acked +=3D new_snd_una - msk->snd_una; + msk->snd_una =3D new_snd_una; +} + static void ack_update_msk(struct mptcp_sock *msk, struct sock *ssk, struct mptcp_options_received *mp_opt) @@ -1057,7 +1063,7 @@ static void ack_update_msk(struct mptcp_sock *msk, __mptcp_check_push(sk, ssk); =20 if (after64(new_snd_una, old_snd_una)) { - msk->snd_una =3D new_snd_una; + __mptcp_snd_una_update(msk, new_snd_una); __mptcp_data_acked(sk); } mptcp_data_unlock(sk); @@ -1123,7 +1129,7 @@ bool mptcp_incoming_options(struct sock *sk, struct s= k_buff *skb) /* on fallback we just need to ignore the msk-level snd_una, as * this is really plain TCP */ - msk->snd_una =3D READ_ONCE(msk->snd_nxt); + __mptcp_snd_una_update(msk, READ_ONCE(msk->snd_nxt)); =20 __mptcp_data_acked(subflow->conn); mptcp_data_unlock(subflow->conn); diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 89fee2ac84e2..adf26b991c1e 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -378,6 +378,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, st= ruct sock *ssk, =20 if (MPTCP_SKB_CB(skb)->map_seq =3D=3D msk->ack_seq) { /* in sequence */ + msk->bytes_received +=3D copy_len; WRITE_ONCE(msk->ack_seq, msk->ack_seq + copy_len); tail =3D skb_peek_tail(&sk->sk_receive_queue); if (tail && mptcp_try_coalesce(sk, tail, skb)) @@ -761,6 +762,7 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk) MPTCP_SKB_CB(skb)->map_seq +=3D delta; __skb_queue_tail(&sk->sk_receive_queue, skb); } + msk->bytes_received +=3D end_seq - msk->ack_seq; msk->ack_seq =3D end_seq; moved =3D true; } @@ -1513,8 +1515,10 @@ static void mptcp_update_post_push(struct mptcp_sock= *msk, * that has been handed to the subflow for transmission * and skip update in case it was old dfrag. */ - if (likely(after64(snd_nxt_new, msk->snd_nxt))) + if (likely(after64(snd_nxt_new, msk->snd_nxt))) { + msk->bytes_sent +=3D snd_nxt_new - msk->snd_nxt; msk->snd_nxt =3D snd_nxt_new; + } } =20 void mptcp_check_and_set_pending(struct sock *sk) @@ -2639,6 +2643,8 @@ static void __mptcp_retrans(struct sock *sk) msk->last_snd =3D ssk; } } + + msk->bytes_retrans +=3D len; dfrag->already_sent =3D max(dfrag->already_sent, len); =20 reset_timer: @@ -3153,6 +3159,10 @@ static int mptcp_disconnect(struct sock *sk, int fla= gs) WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk))); mptcp_pm_data_reset(msk); mptcp_ca_reset(sk); + msk->bytes_acked =3D 0; + msk->bytes_received =3D 0; + msk->bytes_sent =3D 0; + msk->bytes_retrans =3D 0; =20 WRITE_ONCE(sk->sk_shutdown, 0); sk_error_report(sk); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index f9180ecce5e4..0283383a09f4 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -261,10 +261,13 @@ struct mptcp_sock { u64 local_key; u64 remote_key; u64 write_seq; + u64 bytes_sent; u64 snd_nxt; + u64 bytes_received; u64 ack_seq; atomic64_t rcv_wnd_sent; u64 rcv_data_fin_seq; + u64 bytes_retrans; int rmem_fwd_alloc; struct sock *last_snd; int snd_burst; @@ -273,6 +276,7 @@ struct mptcp_sock { * recovery related fields are under data_lock * protection */ + u64 bytes_acked; u64 snd_una; u64 wnd_end; unsigned long timer_ival; diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 59a174ee1d54..a22e2463251b 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -889,7 +889,9 @@ static int mptcp_getsockopt_first_sf_only(struct mptcp_= sock *msk, int level, int =20 void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) { + struct sock *sk =3D (struct sock *)msk; u32 flags =3D 0; + bool slow; =20 memset(info, 0, sizeof(*info)); =20 @@ -915,11 +917,21 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, str= uct mptcp_info *info) if (READ_ONCE(msk->can_ack)) flags |=3D MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED; info->mptcpi_flags =3D flags; - info->mptcpi_token =3D READ_ONCE(msk->token); - info->mptcpi_write_seq =3D READ_ONCE(msk->write_seq); - info->mptcpi_snd_una =3D READ_ONCE(msk->snd_una); - info->mptcpi_rcv_nxt =3D READ_ONCE(msk->ack_seq); - info->mptcpi_csum_enabled =3D READ_ONCE(msk->csum_enabled); + mptcp_data_lock(sk); + info->mptcpi_snd_una =3D msk->snd_una; + info->mptcpi_rcv_nxt =3D msk->ack_seq; + info->mptcpi_bytes_acked =3D msk->bytes_acked; + mptcp_data_unlock(sk); + + slow =3D lock_sock_fast(sk); + info->mptcpi_csum_enabled =3D msk->csum_enabled; + info->mptcpi_token =3D msk->token; + info->mptcpi_write_seq =3D msk->write_seq; + info->mptcpi_retransmits =3D inet_csk(sk)->icsk_retransmits; + info->mptcpi_bytes_sent =3D msk->bytes_sent; + info->mptcpi_bytes_received =3D msk->bytes_received; + info->mptcpi_bytes_retrans =3D msk->bytes_retrans; + unlock_sock_fast(sk, slow); } EXPORT_SYMBOL_GPL(mptcp_diag_fill_info); =20 --=20 2.40.1 From nobody Mon Dec 15 11:54:17 2025 Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.129.124]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 19C321FDD for ; Wed, 24 May 2023 13:50:27 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1684936226; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=LWH2EfHSYdsRTUncJhIKmnCKyeCK8zpc1t1M/miAwfM=; b=GoO/dH5wU3rk3GnvsZow5njxeDZ4pbnAomRTbHZFrIgoImea5h8ootCZ7ez6X/5Jitxm7/ OPo1vpMY/kwSnlMNUo8Uuu6pIjzJMDqos0KC/H6RnvaOH0mMqP2nsN17r5SCQqcIY9g8lz JzXM8yFTcJPjnJh8oUniXEVljo7Dcbc= Received: from mimecast-mx02.redhat.com (mimecast-mx02.redhat.com [66.187.233.88]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id us-mta-567-dTOzuKV4O9SX3Z3cVaF_eQ-1; Wed, 24 May 2023 09:50:22 -0400 X-MC-Unique: dTOzuKV4O9SX3Z3cVaF_eQ-1 Received: from smtp.corp.redhat.com (int-mx05.intmail.prod.int.rdu2.redhat.com [10.11.54.5]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id 6BD97800888; Wed, 24 May 2023 13:50:22 +0000 (UTC) Received: from gerbillo.redhat.com (unknown [10.39.193.138]) by smtp.corp.redhat.com (Postfix) with ESMTP id B5F5D7AF5; Wed, 24 May 2023 13:50:21 +0000 (UTC) From: Paolo Abeni To: mptcp@lists.linux.dev Cc: Florian Westphal Subject: [PATCH v4 mptcp-next 5/6] selftests: mptcp: explicitly tests aggregate counters Date: Wed, 24 May 2023 15:50:10 +0200 Message-Id: <788d4696e4a9a5a3bfccde2abcfb70c8aa3997ce.1684935727.git.pabeni@redhat.com> In-Reply-To: References: Precedence: bulk X-Mailing-List: mptcp@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Scanned-By: MIMEDefang 3.1 on 10.11.54.5 X-Mimecast-Spam-Score: 0 X-Mimecast-Originator: redhat.com Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8"; x-default="true" Update the existing sockopt test-case to do some basic checks on the newly added counters. Signed-off-by: Paolo Abeni --- v2 -> v3: - be more kind with older kernel (Matttbe) --- .../selftests/net/mptcp/mptcp_sockopt.c | 27 ++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/test= ing/selftests/net/mptcp/mptcp_sockopt.c index ae61f39556ca..ff8fcdfccf76 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c @@ -51,6 +51,11 @@ struct mptcp_info { __u8 mptcpi_local_addr_used; __u8 mptcpi_local_addr_max; __u8 mptcpi_csum_enabled; + __u32 mptcpi_retransmits; + __u64 mptcpi_bytes_retrans; + __u64 mptcpi_bytes_sent; + __u64 mptcpi_bytes_received; + __u64 mptcpi_bytes_acked; }; =20 struct mptcp_subflow_data { @@ -83,8 +88,10 @@ struct mptcp_subflow_addrs { =20 struct so_state { struct mptcp_info mi; + struct mptcp_info last_sample; uint64_t mptcpi_rcv_delta; uint64_t tcpi_rcv_delta; + bool pkt_stats_avail; }; =20 static void die_perror(const char *msg) @@ -318,8 +325,9 @@ static void do_getsockopt_mptcp_info(struct so_state *s= , int fd, size_t w) if (ret < 0) die_perror("getsockopt MPTCP_INFO"); =20 - assert(olen =3D=3D sizeof(i)); + s->pkt_stats_avail =3D olen >=3D sizeof(i); =20 + s->last_sample =3D i; if (s->mi.mptcpi_write_seq =3D=3D 0) s->mi =3D i; =20 @@ -556,6 +564,23 @@ static void process_one_client(int fd, int pipefd) do_getsockopts(&s, fd, ret, ret2); if (s.mptcpi_rcv_delta !=3D (uint64_t)ret + 1) xerror("mptcpi_rcv_delta %" PRIu64 ", expect %" PRIu64, s.mptcpi_rcv_del= ta, ret + 1, s.mptcpi_rcv_delta - ret); + + /* be nice when running on top of older kernel */ + if (s.pkt_stats_avail) { + if (s.last_sample.mptcpi_bytes_sent !=3D ret2) + xerror("mptcpi_bytes_sent %" PRIu64 ", expect %" PRIu64, + s.last_sample.mptcpi_bytes_sent, ret2, + s.last_sample.mptcpi_bytes_sent - ret2); + if (s.last_sample.mptcpi_bytes_received !=3D ret) + xerror("mptcpi_bytes_received %" PRIu64 ", expect %" PRIu64, + s.last_sample.mptcpi_bytes_received, ret, + s.last_sample.mptcpi_bytes_received - ret); + if (s.last_sample.mptcpi_bytes_acked !=3D ret) + xerror("mptcpi_bytes_acked %" PRIu64 ", expect %" PRIu64, + s.last_sample.mptcpi_bytes_acked, ret2, + s.last_sample.mptcpi_bytes_acked - ret2); + } + close(fd); } =20 --=20 2.40.1 From nobody Mon Dec 15 11:54:17 2025 Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.129.124]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 3CC87182D1 for ; Wed, 24 May 2023 13:50:26 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1684936224; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=geAGlBs5dryxsl8aECEqdTyFYbGfZr9VS27CyuUAgKU=; b=Y7WfuBN+fbbtcnLaY0/rkVDyiY6yBJTMI2Oo6ry1W/W7bwtc4O83ScLTh8aMpOnRzwJEP/ sS+ePukUm/iVI7NycqKQ4srAhuaW6ckQDaa6CdZpEvepzVIwgPhPNZbHg0YbH2tQbVFsrb eYFwIEQJIvtRDKAxIcNkOqTIDiegTQY= Received: from mimecast-mx02.redhat.com (mimecast-mx02.redhat.com [66.187.233.88]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id us-mta-447-aHi1lq5sOR2UhAXqOquy5Q-1; Wed, 24 May 2023 09:50:23 -0400 X-MC-Unique: aHi1lq5sOR2UhAXqOquy5Q-1 Received: from smtp.corp.redhat.com (int-mx05.intmail.prod.int.rdu2.redhat.com [10.11.54.5]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id 502CA8032FE; Wed, 24 May 2023 13:50:23 +0000 (UTC) Received: from gerbillo.redhat.com (unknown [10.39.193.138]) by smtp.corp.redhat.com (Postfix) with ESMTP id AF1717AF5; Wed, 24 May 2023 13:50:22 +0000 (UTC) From: Paolo Abeni To: mptcp@lists.linux.dev Cc: Florian Westphal Subject: [PATCH v4 mptcp-next 6/6] selftests: mptcp: add MPTCP_FULL_INFO testcase Date: Wed, 24 May 2023 15:50:11 +0200 Message-Id: <930270fbc3358d0c423357f770b679745f8dc4a8.1684935727.git.pabeni@redhat.com> In-Reply-To: References: Precedence: bulk X-Mailing-List: mptcp@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Scanned-By: MIMEDefang 3.1 on 10.11.54.5 X-Mimecast-Spam-Score: 0 X-Mimecast-Originator: redhat.com Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8"; x-default="true" Add a testcase explicitly triggering the newly introduce MPTCP_FULL_INFO getsockopt. Signed-off-by: Paolo Abeni --- .../selftests/net/mptcp/mptcp_sockopt.c | 96 ++++++++++++++++++- 1 file changed, 94 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/test= ing/selftests/net/mptcp/mptcp_sockopt.c index ff8fcdfccf76..e74790c10558 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c @@ -86,9 +86,38 @@ struct mptcp_subflow_addrs { #define MPTCP_SUBFLOW_ADDRS 3 #endif =20 +#ifndef MPTCP_FULL_INFO +struct mptcp_subflow_info { + __u32 id; + struct mptcp_subflow_addrs addrs; +}; + +struct mptcp_subflow_full_info { + __u32 size_subflow_full_info; /* size of this structure in userspace */ + __u32 num_subflows_kern; /* must be 0, set by kernel (real subflow count= ) */ + __u32 size_tcpinfo_kernel; /* must be 0, set by kernel */ + __u32 size_tcpinfo_user; + __u32 size_sfinfo_kernel; /* must be 0, set by kernel */ + __u32 size_sfinfo_user; + __u32 num_subflows_user; /* max subflows that userspace is interested in; + * the buffers at subflow_info_addr/tcp_info_addr + * are respectively at least: + * num_subflows_user * size_sfinfo_user + * num_subflows_user * size_tcpinfo_user + * bytes wide + */ + __aligned_u64 subflow_info_addr; + __aligned_u64 tcp_info_addr; +}; + +#define MPTCP_FULL_INFO 4 +#endif + struct so_state { struct mptcp_info mi; struct mptcp_info last_sample; + struct tcp_info tcp_info; + struct mptcp_subflow_addrs addrs; uint64_t mptcpi_rcv_delta; uint64_t tcpi_rcv_delta; bool pkt_stats_avail; @@ -365,6 +394,8 @@ static void do_getsockopt_tcp_info(struct so_state *s, = int fd, size_t r, size_t olen -=3D sizeof(struct mptcp_subflow_data); assert(olen =3D=3D sizeof(struct tcp_info)); =20 + s->tcp_info =3D ti.ti[0]; + if (ti.ti[0].tcpi_bytes_sent =3D=3D w && ti.ti[0].tcpi_bytes_received =3D=3D r) goto done; @@ -386,7 +417,7 @@ static void do_getsockopt_tcp_info(struct so_state *s, = int fd, size_t r, size_t do_getsockopt_bogus_sf_data(fd, MPTCP_TCPINFO); } =20 -static void do_getsockopt_subflow_addrs(int fd) +static void do_getsockopt_subflow_addrs(struct so_state *s, int fd) { struct sockaddr_storage remote, local; socklen_t olen, rlen, llen; @@ -433,6 +464,7 @@ static void do_getsockopt_subflow_addrs(int fd) =20 assert(memcmp(&local, &addrs.addr[0].ss_local, sizeof(local)) =3D=3D 0); assert(memcmp(&remote, &addrs.addr[0].ss_remote, sizeof(remote)) =3D=3D 0= ); + s->addrs =3D addrs.addr[0]; =20 memset(&addrs, 0, sizeof(addrs)); =20 @@ -453,13 +485,73 @@ static void do_getsockopt_subflow_addrs(int fd) do_getsockopt_bogus_sf_data(fd, MPTCP_SUBFLOW_ADDRS); } =20 +struct my_mptcp_full_info { + struct mptcp_subflow_full_info i; + struct mptcp_info mi; +}; +static void do_getsockopt_mptcp_full_info(struct so_state *s, int fd) +{ + size_t data_size =3D sizeof(struct my_mptcp_full_info); + struct mptcp_subflow_info sfinfo[2]; + struct my_mptcp_full_info mmfi; + struct tcp_info tcp_info[2]; + socklen_t olen; + int ret; + + memset(&mmfi, 0, data_size); + memset(tcp_info, 0, sizeof(tcp_info)); + memset(sfinfo, 0, sizeof(sfinfo)); + + mmfi.i.size_subflow_full_info =3D sizeof(struct mptcp_subflow_full_info); + mmfi.i.size_tcpinfo_user =3D sizeof(struct tcp_info); + mmfi.i.size_sfinfo_user =3D sizeof(struct mptcp_subflow_info); + mmfi.i.num_subflows_user =3D 2; + mmfi.i.subflow_info_addr =3D (unsigned long) &sfinfo[0]; + mmfi.i.tcp_info_addr =3D (unsigned long) &tcp_info[0]; + olen =3D data_size; + + ret =3D getsockopt(fd, SOL_MPTCP, MPTCP_FULL_INFO, &mmfi, &olen); + if (ret < 0) { + if (errno =3D=3D EOPNOTSUPP) { + fprintf(stderr, "\tMPTCP_FULL_INFO test skipped due to lack of kernel s= upport\n"); + return; + } + xerror("getsockopt MPTCP_FULL_INFO"); + } + + assert(olen <=3D data_size); + assert(mmfi.i.size_tcpinfo_user =3D=3D mmfi.i.size_tcpinfo_kernel); + assert(mmfi.i.size_tcpinfo_user =3D=3D sizeof(struct tcp_info)); + assert(mmfi.i.size_sfinfo_user =3D=3D mmfi.i.size_sfinfo_kernel); + assert(mmfi.i.size_sfinfo_user =3D=3D sizeof(struct mptcp_subflow_info)); + assert(mmfi.i.num_subflows_kern =3D=3D 1); + + /* Tolerate future extension to mptcp_info struct and running newer + * test on top of older kernel. + * Anyway any kernel supporting MPTCP_FULL_INFO must at least include + * the following in mptcp_info. + */ + assert(olen > (socklen_t)sizeof(struct mptcp_subflow_full_info)); + assert(mmfi.mi.mptcpi_subflows =3D=3D 0); + assert(mmfi.mi.mptcpi_bytes_sent =3D=3D s->last_sample.mptcpi_bytes_sent); + assert(mmfi.mi.mptcpi_bytes_received =3D=3D s->last_sample.mptcpi_bytes_r= eceived); + + assert(sfinfo[0].id =3D=3D 1); + assert(tcp_info[0].tcpi_bytes_sent =3D=3D s->tcp_info.tcpi_bytes_sent); + assert(tcp_info[0].tcpi_bytes_received =3D=3D s->tcp_info.tcpi_bytes_rece= ived); + assert(!memcmp(&sfinfo->addrs, &s->addrs, sizeof(struct mptcp_subflow_add= rs))); +} + static void do_getsockopts(struct so_state *s, int fd, size_t r, size_t w) { do_getsockopt_mptcp_info(s, fd, w); =20 do_getsockopt_tcp_info(s, fd, r, w); =20 - do_getsockopt_subflow_addrs(fd); + do_getsockopt_subflow_addrs(s, fd); + + if (r) + do_getsockopt_mptcp_full_info(s, fd); } =20 static void connect_one_server(int fd, int pipefd) --=20 2.40.1