Some user-space applications want to monitor the subflows utilization.
Dumping the per subflow tcp_info is not enough, as the PM could close
and re-create the subflows under-the-hood, fooling the accounting.
Even checking the src/dst addresses used by each subflow could not
be enough, because new subflows could re-use the same address/port of
the just closed one.
This patch introduces a new socket option, allow dumping all the relevant
information all-at-once (everything, everywhere...), in a consistent
manner.
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
v5 -> v6:
- fix compiler warning - unused variable
v4 -> v5:
- full_info struct re-design (Florian)
- fix build issue on 32 bit hosts
v3 -> v4:
- full_info struct re-design (Florian)
v2 -> v3:
- added missing changelog (oops)
---
include/uapi/linux/mptcp.h | 24 +++++++
net/mptcp/sockopt.c | 127 ++++++++++++++++++++++++++++++++++++-
2 files changed, 149 insertions(+), 2 deletions(-)
diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h
index a124be6ebbba..ee9c49f949a2 100644
--- a/include/uapi/linux/mptcp.h
+++ b/include/uapi/linux/mptcp.h
@@ -249,9 +249,33 @@ struct mptcp_subflow_addrs {
};
};
+struct mptcp_subflow_info {
+ __u32 id;
+ struct mptcp_subflow_addrs addrs;
+};
+
+struct mptcp_full_info {
+ __u32 size_tcpinfo_kernel; /* must be 0, set by kernel */
+ __u32 size_tcpinfo_user;
+ __u32 size_sfinfo_kernel; /* must be 0, set by kernel */
+ __u32 size_sfinfo_user;
+ __u32 num_subflows; /* must be 0, set by kernel (real subflow count) */
+ __u32 size_arrays_user; /* max subflows that userspace is interested in;
+ * the buffers at subflow_info/tcp_info
+ * are respectively at least:
+ * size_arrays * size_sfinfo_user
+ * size_arrays * size_tcpinfo_user
+ * bytes wide
+ */
+ __aligned_u64 subflow_info;
+ __aligned_u64 tcp_info;
+ struct mptcp_info mptcp_info;
+};
+
/* MPTCP socket options */
#define MPTCP_INFO 1
#define MPTCP_TCPINFO 2
#define MPTCP_SUBFLOW_ADDRS 3
+#define MPTCP_FULL_INFO 4
#endif /* _UAPI_MPTCP_H */
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 770279e0a598..bfbe0378e997 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -14,7 +14,8 @@
#include <net/mptcp.h>
#include "protocol.h"
-#define MIN_INFO_OPTLEN_SIZE 16
+#define MIN_INFO_OPTLEN_SIZE 16
+#define MIN_FULL_INFO_OPTLEN_SIZE 40
static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk)
{
@@ -977,7 +978,8 @@ static int mptcp_put_subflow_data(struct mptcp_subflow_data *sfd,
}
static int mptcp_get_subflow_data(struct mptcp_subflow_data *sfd,
- char __user *optval, int __user *optlen)
+ char __user *optval,
+ int __user *optlen)
{
int len, copylen;
@@ -1158,6 +1160,125 @@ static int mptcp_getsockopt_subflow_addrs(struct mptcp_sock *msk, char __user *o
return 0;
}
+static int mptcp_get_full_info(struct mptcp_full_info *mfi,
+ char __user *optval,
+ int __user *optlen)
+{
+ int len;
+
+ BUILD_BUG_ON(offsetof(struct mptcp_full_info, mptcp_info) !=
+ MIN_FULL_INFO_OPTLEN_SIZE);
+
+ if (get_user(len, optlen))
+ return -EFAULT;
+
+ if (len < MIN_FULL_INFO_OPTLEN_SIZE)
+ return -EINVAL;
+
+ memset(mfi, 0, sizeof(*mfi));
+ if (copy_from_user(mfi, optval, MIN_FULL_INFO_OPTLEN_SIZE))
+ return -EFAULT;
+
+ if (mfi->size_tcpinfo_kernel ||
+ mfi->size_sfinfo_kernel ||
+ mfi->num_subflows)
+ return -EINVAL;
+
+ if (mfi->size_sfinfo_user > INT_MAX ||
+ mfi->size_tcpinfo_user > INT_MAX)
+ return -EINVAL;
+
+ return len - MIN_FULL_INFO_OPTLEN_SIZE;
+}
+
+static int mptcp_put_full_info(struct mptcp_full_info *mfi,
+ char __user *optval,
+ u32 copylen,
+ int __user *optlen)
+{
+ copylen += MIN_FULL_INFO_OPTLEN_SIZE;
+ if (put_user(copylen, optlen))
+ return -EFAULT;
+
+ if (copy_to_user(optval, mfi, copylen))
+ return -EFAULT;
+ return 0;
+}
+
+static int mptcp_getsockopt_full_info(struct mptcp_sock *msk, char __user *optval,
+ int __user *optlen)
+{
+ unsigned int sfcount = 0, copylen = 0;
+ struct mptcp_subflow_context *subflow;
+ struct sock *sk = (struct sock *)msk;
+ void __user *tcpinfoptr, *sfinfoptr;
+ struct mptcp_full_info mfi;
+ int len;
+
+ len = mptcp_get_full_info(&mfi, optval, optlen);
+ if (len < 0)
+ return len;
+
+ /* don't bother filling the mptcp info if there is not enough
+ * user-space-provided storage
+ */
+ if (len > 0) {
+ mptcp_diag_fill_info(msk, &mfi.mptcp_info);
+ copylen += min_t(unsigned int, len, sizeof(struct mptcp_info));
+ }
+
+ mfi.size_tcpinfo_kernel = sizeof(struct tcp_info);
+ mfi.size_tcpinfo_user = min_t(unsigned int, mfi.size_tcpinfo_user,
+ sizeof(struct tcp_info));
+ sfinfoptr = u64_to_user_ptr(mfi.subflow_info);
+ mfi.size_sfinfo_kernel = sizeof(struct mptcp_subflow_info);
+ mfi.size_sfinfo_user = min_t(unsigned int, mfi.size_sfinfo_user,
+ sizeof(struct mptcp_subflow_info));
+ tcpinfoptr = u64_to_user_ptr(mfi.tcp_info);
+
+ lock_sock(sk);
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ struct mptcp_subflow_info sfinfo;
+ struct tcp_info tcp_info;
+
+ if (sfcount++ >= mfi.size_arrays_user)
+ continue;
+
+ /* fetch addr/tcp_info only if the user space buffers
+ * are wide enough
+ */
+ memset(&sfinfo, 0, sizeof(sfinfo));
+ sfinfo.id = subflow->subflow_id;
+ if (mfi.size_sfinfo_user >
+ offsetof(struct mptcp_subflow_info, addrs))
+ mptcp_get_sub_addrs(ssk, &sfinfo.addrs);
+ if (copy_to_user(sfinfoptr, &sfinfo, mfi.size_sfinfo_user))
+ goto fail_release;
+
+ if (mfi.size_tcpinfo_user) {
+ tcp_get_info(ssk, &tcp_info);
+ if (copy_to_user(tcpinfoptr, &tcp_info,
+ mfi.size_tcpinfo_user))
+ goto fail_release;
+ }
+
+ tcpinfoptr += mfi.size_tcpinfo_user;
+ sfinfoptr += mfi.size_sfinfo_user;
+ }
+ release_sock(sk);
+
+ mfi.num_subflows = sfcount;
+ if (mptcp_put_full_info(&mfi, optval, copylen, optlen))
+ return -EFAULT;
+
+ return 0;
+
+fail_release:
+ release_sock(sk);
+ return -EFAULT;
+}
+
static int mptcp_put_int_option(struct mptcp_sock *msk, char __user *optval,
int __user *optlen, int val)
{
@@ -1231,6 +1352,8 @@ static int mptcp_getsockopt_sol_mptcp(struct mptcp_sock *msk, int optname,
switch (optname) {
case MPTCP_INFO:
return mptcp_getsockopt_info(msk, optval, optlen);
+ case MPTCP_FULL_INFO:
+ return mptcp_getsockopt_full_info(msk, optval, optlen);
case MPTCP_TCPINFO:
return mptcp_getsockopt_tcpinfo(msk, optval, optlen);
case MPTCP_SUBFLOW_ADDRS:
--
2.40.1
Hi Paolo,
On 25/05/2023 09:17, Paolo Abeni wrote:
> Some user-space applications want to monitor the subflows utilization.
>
> Dumping the per subflow tcp_info is not enough, as the PM could close
> and re-create the subflows under-the-hood, fooling the accounting.
> Even checking the src/dst addresses used by each subflow could not
> be enough, because new subflows could re-use the same address/port of
> the just closed one.
>
> This patch introduces a new socket option, allow dumping all the relevant
> information all-at-once (everything, everywhere...), in a consistent
> manner.
>
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> ---
> v5 -> v6:
> - fix compiler warning - unused variable
>
> v4 -> v5:
> - full_info struct re-design (Florian)
> - fix build issue on 32 bit hosts
Thank you for the update! I find the new version clearer, hopefully
easier to maintain and to use from the userspace side.
(...)
> diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
> index 770279e0a598..bfbe0378e997 100644
> --- a/net/mptcp/sockopt.c
> +++ b/net/mptcp/sockopt.c
> @@ -14,7 +14,8 @@
> #include <net/mptcp.h>
> #include "protocol.h"
>
> -#define MIN_INFO_OPTLEN_SIZE 16
> +#define MIN_INFO_OPTLEN_SIZE 16
> +#define MIN_FULL_INFO_OPTLEN_SIZE 40
>
> static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk)
> {
> @@ -977,7 +978,8 @@ static int mptcp_put_subflow_data(struct mptcp_subflow_data *sfd,
> }
>
> static int mptcp_get_subflow_data(struct mptcp_subflow_data *sfd,
> - char __user *optval, int __user *optlen)
> + char __user *optval,
> + int __user *optlen)
Do you mind if I undo this modification when applying the patch?
Or did you do that on purpose?
Cheers,
Matt
--
Tessares | Belgium | Hybrid Access Solutions
www.tessares.net
On Thu, 2023-05-25 at 17:20 +0200, Matthieu Baerts wrote:
> Hi Paolo,
>
> On 25/05/2023 09:17, Paolo Abeni wrote:
> > Some user-space applications want to monitor the subflows utilization.
> >
> > Dumping the per subflow tcp_info is not enough, as the PM could close
> > and re-create the subflows under-the-hood, fooling the accounting.
> > Even checking the src/dst addresses used by each subflow could not
> > be enough, because new subflows could re-use the same address/port of
> > the just closed one.
> >
> > This patch introduces a new socket option, allow dumping all the relevant
> > information all-at-once (everything, everywhere...), in a consistent
> > manner.
> >
> > Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> > ---
> > v5 -> v6:
> > - fix compiler warning - unused variable
> >
> > v4 -> v5:
> > - full_info struct re-design (Florian)
> > - fix build issue on 32 bit hosts
>
> Thank you for the update! I find the new version clearer, hopefully
> easier to maintain and to use from the userspace side.
>
> (...)
>
> > diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
> > index 770279e0a598..bfbe0378e997 100644
> > --- a/net/mptcp/sockopt.c
> > +++ b/net/mptcp/sockopt.c
> > @@ -14,7 +14,8 @@
> > #include <net/mptcp.h>
> > #include "protocol.h"
> >
> > -#define MIN_INFO_OPTLEN_SIZE 16
> > +#define MIN_INFO_OPTLEN_SIZE 16
> > +#define MIN_FULL_INFO_OPTLEN_SIZE 40
> >
> > static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk)
> > {
> > @@ -977,7 +978,8 @@ static int mptcp_put_subflow_data(struct mptcp_subflow_data *sfd,
> > }
> >
> > static int mptcp_get_subflow_data(struct mptcp_subflow_data *sfd,
> > - char __user *optval, int __user *optlen)
> > + char __user *optval,
> > + int __user *optlen)
>
> Do you mind if I undo this modification when applying the patch?
> Or did you do that on purpose?
Oops, landed there unintentionally. Thanks for spotting. Please restore
the original code, thanks!
/P
© 2016 - 2026 Red Hat, Inc.