Some user-space applications want to monitor the subflows utilization.
Dumping the per subflow tcp_info is not enough, as the PM could close
and re-create the subflows under-the-hood, fooling the accounting.
Even checking the src/dst addresses used by each subflow could not
be enough, because new subflows could re-use the same address/port of
the just closed one.
This patch introduces a new socket option, allow dumping all the relevant
information all-at-once (everything, everywhere...), in a consistent manner.
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
v4 -> v5:
- full_info struct re-design (Florian)
- fix build issue on 32 bit hosts
v3 -> v4:
- full_info struct re-design (Florian)
v2 -> v3:
- added missing changelog (oops)
---
include/uapi/linux/mptcp.h | 24 +++++++
net/mptcp/sockopt.c | 133 ++++++++++++++++++++++++++++++++++++-
2 files changed, 155 insertions(+), 2 deletions(-)
diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h
index a124be6ebbba..ee9c49f949a2 100644
--- a/include/uapi/linux/mptcp.h
+++ b/include/uapi/linux/mptcp.h
@@ -249,9 +249,33 @@ struct mptcp_subflow_addrs {
};
};
+struct mptcp_subflow_info {
+ __u32 id;
+ struct mptcp_subflow_addrs addrs;
+};
+
+struct mptcp_full_info {
+ __u32 size_tcpinfo_kernel; /* must be 0, set by kernel */
+ __u32 size_tcpinfo_user;
+ __u32 size_sfinfo_kernel; /* must be 0, set by kernel */
+ __u32 size_sfinfo_user;
+ __u32 num_subflows; /* must be 0, set by kernel (real subflow count) */
+ __u32 size_arrays_user; /* max subflows that userspace is interested in;
+ * the buffers at subflow_info/tcp_info
+ * are respectively at least:
+ * size_arrays * size_sfinfo_user
+ * size_arrays * size_tcpinfo_user
+ * bytes wide
+ */
+ __aligned_u64 subflow_info;
+ __aligned_u64 tcp_info;
+ struct mptcp_info mptcp_info;
+};
+
/* MPTCP socket options */
#define MPTCP_INFO 1
#define MPTCP_TCPINFO 2
#define MPTCP_SUBFLOW_ADDRS 3
+#define MPTCP_FULL_INFO 4
#endif /* _UAPI_MPTCP_H */
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 770279e0a598..b6f6fa1eb53b 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -14,7 +14,8 @@
#include <net/mptcp.h>
#include "protocol.h"
-#define MIN_INFO_OPTLEN_SIZE 16
+#define MIN_INFO_OPTLEN_SIZE 16
+#define MIN_FULL_INFO_OPTLEN_SIZE 40
static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk)
{
@@ -977,7 +978,8 @@ static int mptcp_put_subflow_data(struct mptcp_subflow_data *sfd,
}
static int mptcp_get_subflow_data(struct mptcp_subflow_data *sfd,
- char __user *optval, int __user *optlen)
+ char __user *optval,
+ int __user *optlen)
{
int len, copylen;
@@ -1158,6 +1160,131 @@ static int mptcp_getsockopt_subflow_addrs(struct mptcp_sock *msk, char __user *o
return 0;
}
+static int mptcp_get_full_info(struct mptcp_full_info *mfi,
+ char __user *optval,
+ int __user *optlen)
+{
+ int len;
+
+ BUILD_BUG_ON(offsetof(struct mptcp_full_info, mptcp_info) !=
+ MIN_FULL_INFO_OPTLEN_SIZE);
+
+ if (get_user(len, optlen))
+ return -EFAULT;
+
+ if (len < MIN_FULL_INFO_OPTLEN_SIZE)
+ return -EINVAL;
+
+ memset(mfi, 0, sizeof(*mfi));
+ if (copy_from_user(mfi, optval, MIN_FULL_INFO_OPTLEN_SIZE))
+ return -EFAULT;
+
+ if (mfi->size_tcpinfo_kernel ||
+ mfi->size_sfinfo_kernel ||
+ mfi->num_subflows)
+ return -EINVAL;
+
+ if (mfi->size_sfinfo_user > INT_MAX ||
+ mfi->size_tcpinfo_user > INT_MAX)
+ return -EINVAL;
+
+ return len - MIN_FULL_INFO_OPTLEN_SIZE;
+}
+
+static int mptcp_put_full_info(struct mptcp_full_info *mfi,
+ char __user *optval,
+ u32 copied,
+ int __user *optlen)
+{
+ copied += MIN_FULL_INFO_OPTLEN_SIZE;
+ if (put_user(copied, optlen))
+ return -EFAULT;
+
+ if (copy_to_user(optval, mfi, copied))
+ return -EFAULT;
+ return 0;
+}
+
+static int mptcp_getsockopt_full_info(struct mptcp_sock *msk, char __user *optval,
+ int __user *optlen)
+{
+ struct mptcp_subflow_context *subflow;
+ struct sock *sk = (struct sock *)msk;
+ unsigned int sfcount = 0, copied = 0;
+ void __user *tcpinfoptr, *sfinfoptr;
+ struct mptcp_full_info mfi;
+ int len;
+
+ len = mptcp_get_full_info(&mfi, optval, optlen);
+ if (len < 0)
+ return len;
+
+ /* don't bother filling the mptcp info if there is not enough
+ * user-space-provided storage
+ */
+ if (len > 0) {
+ char __user *infoptr;
+ int mptcp_info_len;
+
+ infoptr = optval + MIN_FULL_INFO_OPTLEN_SIZE;
+ mptcp_info_len = min_t(unsigned int, len, sizeof(struct mptcp_info));
+ mptcp_diag_fill_info(msk, &mfi.mptcp_info);
+
+ copied += mptcp_info_len;
+ }
+
+ mfi.size_tcpinfo_kernel = sizeof(struct tcp_info);
+ mfi.size_tcpinfo_user = min_t(unsigned int, mfi.size_tcpinfo_user,
+ sizeof(struct tcp_info));
+ sfinfoptr = u64_to_user_ptr(mfi.subflow_info);
+ mfi.size_sfinfo_kernel = sizeof(struct mptcp_subflow_info);
+ mfi.size_sfinfo_user = min_t(unsigned int, mfi.size_sfinfo_user,
+ sizeof(struct mptcp_subflow_info));
+ tcpinfoptr = u64_to_user_ptr(mfi.tcp_info);
+
+ lock_sock(sk);
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ struct mptcp_subflow_info sfinfo;
+ struct tcp_info tcp_info;
+
+ if (sfcount++ >= mfi.size_arrays_user)
+ continue;
+
+ /* fetch addr/tcp_info only if the user space buffers
+ * are wide enough
+ */
+ memset(&sfinfo, 0, sizeof(sfinfo));
+ sfinfo.id = subflow->subflow_id;
+ if (mfi.size_sfinfo_user >
+ offsetof(struct mptcp_subflow_info, addrs))
+ mptcp_get_sub_addrs(ssk, &sfinfo.addrs);
+ if (copy_to_user(sfinfoptr, &sfinfo, mfi.size_sfinfo_user))
+ goto fail_release;
+
+ if (mfi.size_tcpinfo_user) {
+ tcp_get_info(ssk, &tcp_info);
+ if (copy_to_user(tcpinfoptr, &tcp_info,
+ mfi.size_tcpinfo_user))
+ goto fail_release;
+ }
+
+ tcpinfoptr += mfi.size_tcpinfo_user;
+ sfinfoptr += mfi.size_sfinfo_user;
+ }
+ release_sock(sk);
+
+ mfi.num_subflows = sfcount;
+ if (mptcp_put_full_info(&mfi, optval, copied, optlen))
+ return -EFAULT;
+
+ return 0;
+
+fail_release:
+ release_sock(sk);
+ return -EFAULT;
+}
+
static int mptcp_put_int_option(struct mptcp_sock *msk, char __user *optval,
int __user *optlen, int val)
{
@@ -1231,6 +1358,8 @@ static int mptcp_getsockopt_sol_mptcp(struct mptcp_sock *msk, int optname,
switch (optname) {
case MPTCP_INFO:
return mptcp_getsockopt_info(msk, optval, optlen);
+ case MPTCP_FULL_INFO:
+ return mptcp_getsockopt_full_info(msk, optval, optlen);
case MPTCP_TCPINFO:
return mptcp_getsockopt_tcpinfo(msk, optval, optlen);
case MPTCP_SUBFLOW_ADDRS:
--
2.40.1
Hi Paolo,
kernel test robot noticed the following build warnings:
[auto build test WARNING on mptcp/export]
[cannot apply to mptcp/export-net linus/master v6.4-rc3 next-20230524]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Paolo-Abeni/mptcp-move-snd_una-update-earlier-for-fallback-socket/20230525-053110
base: https://github.com/multipath-tcp/mptcp_net-next.git export
patch link: https://lore.kernel.org/r/400326ed4b59e6c9878fc4a406c2bf406876ee72.1684962909.git.pabeni%40redhat.com
patch subject: [PATCH v5 mptcp-next 5/6] mptcp: introduce MPTCP_FULL_INFO getsockopt
config: ia64-allyesconfig (https://download.01.org/0day-ci/archive/20230525/202305250816.Ie1tjg5a-lkp@intel.com/config)
compiler: ia64-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
mkdir -p ~/bin
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/intel-lab-lkp/linux/commit/8317ccb72cec68bc55db2258c67c166180f07712
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Paolo-Abeni/mptcp-move-snd_una-update-earlier-for-fallback-socket/20230525-053110
git checkout 8317ccb72cec68bc55db2258c67c166180f07712
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 ~/bin/make.cross W=1 O=build_dir ARCH=ia64 olddefconfig
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 ~/bin/make.cross W=1 O=build_dir ARCH=ia64 SHELL=/bin/bash net/mptcp/
If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202305250816.Ie1tjg5a-lkp@intel.com/
All warnings (new ones prefixed by >>):
net/mptcp/sockopt.c: In function 'mptcp_getsockopt_full_info':
>> net/mptcp/sockopt.c:1226:30: warning: variable 'infoptr' set but not used [-Wunused-but-set-variable]
1226 | char __user *infoptr;
| ^~~~~~~
vim +/infoptr +1226 net/mptcp/sockopt.c
1207
1208 static int mptcp_getsockopt_full_info(struct mptcp_sock *msk, char __user *optval,
1209 int __user *optlen)
1210 {
1211 struct mptcp_subflow_context *subflow;
1212 struct sock *sk = (struct sock *)msk;
1213 unsigned int sfcount = 0, copied = 0;
1214 void __user *tcpinfoptr, *sfinfoptr;
1215 struct mptcp_full_info mfi;
1216 int len;
1217
1218 len = mptcp_get_full_info(&mfi, optval, optlen);
1219 if (len < 0)
1220 return len;
1221
1222 /* don't bother filling the mptcp info if there is not enough
1223 * user-space-provided storage
1224 */
1225 if (len > 0) {
> 1226 char __user *infoptr;
1227 int mptcp_info_len;
1228
1229 infoptr = optval + MIN_FULL_INFO_OPTLEN_SIZE;
1230 mptcp_info_len = min_t(unsigned int, len, sizeof(struct mptcp_info));
1231 mptcp_diag_fill_info(msk, &mfi.mptcp_info);
1232
1233 copied += mptcp_info_len;
1234 }
1235
1236 mfi.size_tcpinfo_kernel = sizeof(struct tcp_info);
1237 mfi.size_tcpinfo_user = min_t(unsigned int, mfi.size_tcpinfo_user,
1238 sizeof(struct tcp_info));
1239 sfinfoptr = u64_to_user_ptr(mfi.subflow_info);
1240 mfi.size_sfinfo_kernel = sizeof(struct mptcp_subflow_info);
1241 mfi.size_sfinfo_user = min_t(unsigned int, mfi.size_sfinfo_user,
1242 sizeof(struct mptcp_subflow_info));
1243 tcpinfoptr = u64_to_user_ptr(mfi.tcp_info);
1244
1245 lock_sock(sk);
1246 mptcp_for_each_subflow(msk, subflow) {
1247 struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
1248 struct mptcp_subflow_info sfinfo;
1249 struct tcp_info tcp_info;
1250
1251 if (sfcount++ >= mfi.size_arrays_user)
1252 continue;
1253
1254 /* fetch addr/tcp_info only if the user space buffers
1255 * are wide enough
1256 */
1257 memset(&sfinfo, 0, sizeof(sfinfo));
1258 sfinfo.id = subflow->subflow_id;
1259 if (mfi.size_sfinfo_user >
1260 offsetof(struct mptcp_subflow_info, addrs))
1261 mptcp_get_sub_addrs(ssk, &sfinfo.addrs);
1262 if (copy_to_user(sfinfoptr, &sfinfo, mfi.size_sfinfo_user))
1263 goto fail_release;
1264
1265 if (mfi.size_tcpinfo_user) {
1266 tcp_get_info(ssk, &tcp_info);
1267 if (copy_to_user(tcpinfoptr, &tcp_info,
1268 mfi.size_tcpinfo_user))
1269 goto fail_release;
1270 }
1271
1272 tcpinfoptr += mfi.size_tcpinfo_user;
1273 sfinfoptr += mfi.size_sfinfo_user;
1274 }
1275 release_sock(sk);
1276
1277 mfi.num_subflows = sfcount;
1278 if (mptcp_put_full_info(&mfi, optval, copied, optlen))
1279 return -EFAULT;
1280
1281 return 0;
1282
1283 fail_release:
1284 release_sock(sk);
1285 return -EFAULT;
1286 }
1287
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
© 2016 - 2025 Red Hat, Inc.