As is described in the "How to use MPTCP?" section in MPTCP wiki [1]:
"Your app can create sockets with IPPROTO_MPTCP as the proto:
( socket(AF_INET, SOCK_STREAM, IPPROTO_MPTCP); ). Legacy apps can be
forced to create and use MPTCP sockets instead of TCP ones via the
mptcpize command bundled with the mptcpd daemon."
But the mptcpize (LD_PRELOAD technique) command has some limitations
[2]:
- it doesn't work if the application is not using libc (e.g. GoLang
apps)
- in some envs, it might not be easy to set env vars / change the way
apps are launched, e.g. on Android
- mptcpize needs to be launched with all apps that want MPTCP: we could
have more control from BPF to enable MPTCP only for some apps or all the
ones of a netns or a cgroup, etc.
- it is not in BPF, we cannot talk about it at netdev conf.
So this patchset attempts to use BPF to implement functions similer to
mptcpize.
The main idea is add a hook in sys_socket() to change the protocol id
from IPPROTO_TCP (or 0) to IPPROTO_MPTCP.
1. This first solution [3] is using "cgroup/sock_create":
Implement a new helper bpf_mptcpify() to change the protocol id:
+BPF_CALL_1(bpf_mptcpify, struct sock *, sk)
+{
+ if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) {
+ sk->sk_protocol = IPPROTO_MPTCP;
+ return (unsigned long)sk;
+ }
+
+ return (unsigned long)NULL;
+}
+
+const struct bpf_func_proto bpf_mptcpify_proto = {
+ .func = bpf_mptcpify,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
+ .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
+ .arg1_type = ARG_PTR_TO_CTX,
+};
Add a hook in "cgroup/sock_create" section, invoking bpf_mptcpify()
helper in this hook:
+SEC("cgroup/sock_create")
+int sock(struct bpf_sock *ctx)
+{
+ struct sock *sk;
+
+ if (ctx->type != SOCK_STREAM)
+ return 1;
+
+ sk = bpf_mptcpify(ctx);
+ if (!sk)
+ return 1;
+
+ protocol = sk->sk_protocol;
+ return 1;
+}
But this solution doesn't work, because the sock_create section is
hooked at the end of inet_create(). It's too late to change the protocol
id.
2. The second solution [4] is using "fentry":
Implement a bpf_mptcpify() helper:
+BPF_CALL_1(bpf_mptcpify, struct socket_args *, args)
+{
+ if (args->family == AF_INET &&
+ args->type == SOCK_STREAM &&
+ (!args->protocol || args->protocol == IPPROTO_TCP))
+ args->protocol = IPPROTO_MPTCP;
+
+ return 0;
+}
+
+BTF_ID_LIST(bpf_mptcpify_btf_ids)
+BTF_ID(struct, socket_args)
+
+static const struct bpf_func_proto bpf_mptcpify_proto = {
+ .func = bpf_mptcpify,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .arg1_btf_id = &bpf_mptcpify_btf_ids[0],
+};
Add a new wrapper socket_create() in sys_socket() path, passing a
pointer of struct socket_args (int family; int type; int protocol) to
the wrapper.
+int socket_create(struct socket_args *args, struct socket **res)
+{
+ return sock_create(args->family, args->type, args->protocol, res);
+}
+EXPORT_SYMBOL(socket_create);
+
/**
* sock_create_kern - creates a socket (kernel space)
* @net: net namespace
@@ -1608,6 +1614,7 @@ EXPORT_SYMBOL(sock_create_kern);
static struct socket *__sys_socket_create(int family, int type, int protocol)
{
+ struct socket_args args = { 0 };
struct socket *sock;
int retval;
@@ -1621,7 +1628,10 @@ static struct socket *__sys_socket_create(int family, int type, int protocol)
return ERR_PTR(-EINVAL);
type &= SOCK_TYPE_MASK;
- retval = sock_create(family, type, protocol, &sock);
+ args.family = family;
+ args.type = type;
+ args.protocol = protocol;
+ retval = socket_create(&args, &sock);
if (retval < 0)
return ERR_PTR(retval);
Add "fentry" hook to the newly added wrapper, invoking bpf_mptcpify()
in the hook:
+SEC("fentry/socket_create")
+int BPF_PROG(trace_socket_create, void *args,
+ struct socket **res)
+{
+ bpf_mptcpify(args);
+ return 0;
+}
This version works. But it's just a work around version. Since the code
to add a wrapper to sys_socket() is not very elegant indeed, and it
shouldn't be accepted by upstream.
3. The last solution is this patchset itself:
Introduce new program type BPF_PROG_TYPE_CGROUP_SOCKINIT and attach type
BPF_CGROUP_SOCKINIT on cgroup basis.
Define BPF_CGROUP_RUN_PROG_SOCKINIT() helper, and implement
__cgroup_bpf_run_sockinit() helper to run a sockinit program:
+#define BPF_CGROUP_RUN_PROG_SOCKINIT(family, type, protocol) \
+({ \
+ int __ret = 0; \
+ if (cgroup_bpf_enabled(CGROUP_SOCKINIT)) \
+ __ret = __cgroup_bpf_run_sockinit(family, type, protocol, \
+ CGROUP_SOCKINIT); \
+ __ret; \
+})
+
Invoke BPF_CGROUP_RUN_PROG_SOCKINIT() in __socket_create() to change
the arguments:
@@ -1469,6 +1469,12 @@ int __sock_create(struct net *net, int family, int type, int protocol,
struct socket *sock;
const struct net_proto_family *pf;
+ if (!kern) {
+ err = BPF_CGROUP_RUN_PROG_SOCKINIT(&family, &type, &protocol);
+ if (err)
+ return err;
+ }
+
/*
* Check protocol is in range
*/
Define BPF program in this newly added 'sockinit' SEC, so it will be
hooked in BPF_CGROUP_RUN_PROG_SOCKINIT() in __socket_create().
@@ -158,6 +158,11 @@ static struct sec_name_test tests[] = {
{0, BPF_PROG_TYPE_CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT},
{0, BPF_CGROUP_SETSOCKOPT},
},
+ {
+ "cgroup/sockinit",
+ {0, BPF_PROG_TYPE_CGROUP_SOCKINIT, BPF_CGROUP_SOCKINIT},
+ {0, BPF_CGROUP_SOCKINIT},
+ },
};
+SEC("cgroup/sockinit")
+int mptcpify(struct bpf_sockinit_ctx *ctx)
+{
+ if ((ctx->family == AF_INET || ctx->family == AF_INET6) &&
+ ctx->type == SOCK_STREAM &&
+ (!ctx->protocol || ctx->protocol == IPPROTO_TCP)) {
+ ctx->protocol = IPPROTO_MPTCP;
+ }
+
+ return 1;
+}
This version is the best solution we have found so far.
[1]
https://github.com/multipath-tcp/mptcp_net-next/wiki
[2]
https://github.com/multipath-tcp/mptcp_net-next/issues/79
[3]
https://patchwork.kernel.org/project/mptcp/cover/cover.1688215769.git.geliang.tang@suse.com/
[4]
https://patchwork.kernel.org/project/mptcp/cover/cover.1688366249.git.geliang.tang@suse.com/
v2:
- Fix build selftests errors reported by CI:
/tmp/work/bpf/bpf/tools/testing/selftests/bpf/prog_tests/mptcp.c:218:2: note: ‘snprintf’ output between 102 and 133 bytes into a destination of size 128
218 | snprintf(cmd, sizeof(cmd),
| ^~~~~~~~~~~~~~~~~~~~~~~~~~
219 | "ip netns exec %s nstat -asz MPTcpExtMPCapableSYNACKRX | \
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
220 | awk 'NR==1 {next} {print $2}' | \
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
221 | grep -q 1", NS_TEST);
| ~~~~~~~~~~~~~~~~~~~~
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/79
Geliang Tang (8):
bpf: Add new prog type sockinit
bpf: Run a sockinit program
net: socket: run sockinit hooks
libbpf: Support sockinit hook
selftests/bpf: Add mptcpify program
selftests/bpf: use random netns name for mptcp
selftests/bpf: add two mptcp netns helpers
selftests/bpf: Add mptcpify selftest
include/linux/bpf-cgroup-defs.h | 1 +
include/linux/bpf-cgroup.h | 14 ++
include/linux/bpf_types.h | 2 +
include/uapi/linux/bpf.h | 8 ++
kernel/bpf/cgroup.c | 90 +++++++++++++
kernel/bpf/syscall.c | 7 +
kernel/bpf/verifier.c | 1 +
net/socket.c | 6 +
tools/include/uapi/linux/bpf.h | 8 ++
tools/lib/bpf/libbpf.c | 3 +
tools/lib/bpf/libbpf_probes.c | 1 +
.../bpf/cgroup_getset_retval_hooks.h | 1 +
.../testing/selftests/bpf/prog_tests/mptcp.c | 123 ++++++++++++++++--
.../selftests/bpf/prog_tests/section_names.c | 5 +
tools/testing/selftests/bpf/progs/mptcpify.c | 26 ++++
15 files changed, 287 insertions(+), 9 deletions(-)
create mode 100644 tools/testing/selftests/bpf/progs/mptcpify.c
--
2.35.3