1
From: Geliang Tang <tanggeliang@kylinos.cn>
1
From: Geliang Tang <tanggeliang@kylinos.cn>
2
2
3
Depends on:
3
Some path manager related refactoring and cleanups.
4
- patch 1, a cleanup.
5
- patches 2-4, drop mptcp_pm_is_userspace() and mptcp_pm_is_kernel().
6
- patches 5-6, change remote of set_flags as mptcp_pm_addr_entry type.
4
7
5
- BPF path manager, part 4, v4
8
Geliang Tang (6):
6
Based-on: <cover.1738919954.git.tanggeliang@kylinos.cn>
9
mptcp: pm: use pm variable instead of msk->pm
10
mptcp: pm: userspace: drop is_userspace in free_local_addr_list
11
mptcp: pm: drop is_kernel in alloc_anno_list
12
mptcp: pm: in-kernel: drop is_userspace in remove_id_zero
13
mptcp: pm: add remote parameter for set_flags
14
mptcp: pm: in-kernel: drop changed parameter of set_flags
7
15
8
- add mptcp_address bpf_iter, v5
16
net/mptcp/pm.c | 28 ++++++++----
9
Based-on: <cover.1738924354.git.tanggeliang@kylinos.cn>
17
net/mptcp/pm_netlink.c | 93 +++++++++++++++++++++-------------------
10
18
net/mptcp/pm_userspace.c | 25 +++--------
11
Geliang Tang (5):
19
net/mptcp/protocol.h | 5 ++-
12
bpf: Add mptcp path manager struct_ops
20
4 files changed, 77 insertions(+), 74 deletions(-)
13
bpf: Register mptcp struct_ops kfunc set
14
selftests/bpf: Add mptcp userspace pm subtest
15
selftests/bpf: Implement mptcp pm helpers in BPF
16
selftests/bpf: Add mptcp bpf path manager subtest
17
18
net/mptcp/bpf.c | 372 +++++++++++++++++-
19
.../testing/selftests/bpf/prog_tests/mptcp.c | 211 ++++++++++
20
tools/testing/selftests/bpf/progs/mptcp_bpf.h | 185 +++++++++
21
.../bpf/progs/mptcp_bpf_userspace_pm.c | 275 +++++++++++++
22
4 files changed, 1042 insertions(+), 1 deletion(-)
23
create mode 100644 tools/testing/selftests/bpf/progs/mptcp_bpf_userspace_pm.c
24
21
25
--
22
--
26
2.43.0
23
2.43.0
diff view generated by jsdifflib
New patch
1
From: Geliang Tang <tanggeliang@kylinos.cn>
1
2
3
The variable "pm" has been defined in mptcp_pm_fully_established()
4
and mptcp_pm_data_reset() as "sk->pm", so use "pm" directly instead
5
of using "sk->pm".
6
7
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
8
---
9
net/mptcp/pm.c | 8 ++++----
10
1 file changed, 4 insertions(+), 4 deletions(-)
11
12
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/net/mptcp/pm.c
15
+++ b/net/mptcp/pm.c
16
@@ -XXX,XX +XXX,XX @@ void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk)
17
     * be sure to serve this event only once.
18
     */
19
    if (READ_ONCE(pm->work_pending) &&
20
-     !(msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)))
21
+     !(pm->status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)))
22
        mptcp_pm_schedule_work(msk, MPTCP_PM_ESTABLISHED);
23
24
-    if ((msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)) == 0)
25
+    if ((pm->status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)) == 0)
26
        announce = true;
27
28
-    msk->pm.status |= BIT(MPTCP_PM_ALREADY_ESTABLISHED);
29
+    pm->status |= BIT(MPTCP_PM_ALREADY_ESTABLISHED);
30
    spin_unlock_bh(&pm->lock);
31
32
    if (announce)
33
@@ -XXX,XX +XXX,XX @@ void mptcp_pm_data_reset(struct mptcp_sock *msk)
34
    WRITE_ONCE(pm->addr_signal, 0);
35
    WRITE_ONCE(pm->remote_deny_join_id0, false);
36
    pm->status = 0;
37
-    bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
38
+    bitmap_fill(pm->id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
39
}
40
41
void mptcp_pm_data_init(struct mptcp_sock *msk)
42
--
43
2.43.0
diff view generated by jsdifflib
1
From: Geliang Tang <tanggeliang@kylinos.cn>
1
From: Geliang Tang <tanggeliang@kylinos.cn>
2
2
3
This patch implements MPTCP path manager helpers mptcp_pm_copy_addr(),
3
To reduce the path manager's reliance on mptcp_pm_is_userspace()
4
mptcp_pm_copy_entry(), ipv6_addr_equal(), mptcp_addresses_equal() and
4
and mptcp_pm_is_kernel() helpers, this patch drops the check for
5
mptcp_pm_find_ssk() in BPF.
5
mptcp_pm_is_userspace() in mptcp_free_local_addr_list() and
6
replaces it with a check to see if userspace_pm_local_addr_list
7
is empty.
6
8
7
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
9
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
8
---
10
---
9
tools/testing/selftests/bpf/progs/mptcp_bpf.h | 110 ++++++++++++++++++
11
net/mptcp/pm_userspace.c | 2 +-
10
1 file changed, 110 insertions(+)
12
1 file changed, 1 insertion(+), 1 deletion(-)
11
13
12
diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
14
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
13
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
14
--- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h
16
--- a/net/mptcp/pm_userspace.c
15
+++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
17
+++ b/net/mptcp/pm_userspace.c
16
@@ -XXX,XX +XXX,XX @@
18
@@ -XXX,XX +XXX,XX @@ void mptcp_free_local_addr_list(struct mptcp_sock *msk)
17
#define __MPTCP_BPF_H__
19
    struct sock *sk = (struct sock *)msk;
18
20
    LIST_HEAD(free_list);
19
#include "bpf_experimental.h"
21
20
+#include "bpf_tracing_net.h"
22
-    if (!mptcp_pm_is_userspace(msk))
21
23
+    if (list_empty(&msk->pm.userspace_pm_local_addr_list))
22
/* mptcp helpers from include/net/mptcp.h */
24
        return;
23
#define MPTCP_SUBFLOWS_MAX 8
25
24
@@ -XXX,XX +XXX,XX @@ static inline int list_is_head(const struct list_head *list,
26
    spin_lock_bh(&msk->pm.lock);
25
#define mptcp_for_each_subflow(__msk, __subflow)            \
26
    list_for_each_entry(__subflow, &((__msk)->conn_list), node)
27
28
+/* errno macros from include/uapi/asm-generic/errno-base.h */
29
+#define    ESRCH        3    /* No such process */
30
+#define    ENOMEM        12    /* Out of Memory */
31
+#define    EINVAL        22    /* Invalid argument */
32
+
33
static __always_inline struct sock *
34
mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow)
35
{
36
@@ -XXX,XX +XXX,XX @@ extern void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow,
37
extern struct mptcp_subflow_context *
38
bpf_mptcp_subflow_ctx_by_pos(const struct mptcp_sched_data *data, unsigned int pos) __ksym;
39
40
+/* reimplemented BPF helpers */
41
+static __always_inline void
42
+mptcp_pm_copy_addr(struct mptcp_addr_info *dst,
43
+         struct mptcp_addr_info *src)
44
+{
45
+    dst->id = src->id;
46
+    dst->family = src->family;
47
+    dst->port = src->port;
48
+
49
+    if (src->family == AF_INET) {
50
+        dst->addr.s_addr = src->addr.s_addr;
51
+    } else if (src->family == AF_INET6) {
52
+        dst->addr6.s6_addr32[0] = src->addr6.s6_addr32[0];
53
+        dst->addr6.s6_addr32[1] = src->addr6.s6_addr32[1];
54
+        dst->addr6.s6_addr32[2] = src->addr6.s6_addr32[2];
55
+        dst->addr6.s6_addr32[3] = src->addr6.s6_addr32[3];
56
+    }
57
+}
58
+
59
+static __always_inline void
60
+mptcp_pm_copy_entry(struct mptcp_pm_addr_entry *dst,
61
+         struct mptcp_pm_addr_entry *src)
62
+{
63
+    mptcp_pm_copy_addr(&dst->addr, &src->addr);
64
+
65
+    dst->flags = src->flags;
66
+    dst->ifindex = src->ifindex;
67
+}
68
+
69
+#define inet_sk(ptr) container_of(ptr, struct inet_sock, sk)
70
+
71
+#define ipv6_addr_equal(a, b)    ((a).s6_addr32[0] == (b).s6_addr32[0] &&    \
72
+                 (a).s6_addr32[1] == (b).s6_addr32[1] &&    \
73
+                 (a).s6_addr32[2] == (b).s6_addr32[2] &&    \
74
+                 (a).s6_addr32[3] == (b).s6_addr32[3])
75
+
76
+static __always_inline bool
77
+mptcp_addresses_equal(const struct mptcp_addr_info *a,
78
+         const struct mptcp_addr_info *b, bool use_port)
79
+{
80
+    bool addr_equals = false;
81
+
82
+    if (a->family == b->family) {
83
+        if (a->family == AF_INET)
84
+            addr_equals = a->addr.s_addr == b->addr.s_addr;
85
+        else
86
+            addr_equals = ipv6_addr_equal(a->addr6, b->addr6);
87
+    }
88
+
89
+    if (!addr_equals)
90
+        return false;
91
+    if (!use_port)
92
+        return true;
93
+
94
+    return a->port == b->port;
95
+}
96
+
97
+static __always_inline struct sock *
98
+mptcp_pm_find_ssk(struct mptcp_sock *msk,
99
+         const struct mptcp_addr_info *local,
100
+         const struct mptcp_addr_info *remote)
101
+{
102
+    struct mptcp_subflow_context *subflow;
103
+
104
+    if (local->family != remote->family)
105
+        return NULL;
106
+
107
+    bpf_for_each(mptcp_subflow, subflow, msk) {
108
+        const struct inet_sock *issk;
109
+        struct sock *ssk;
110
+
111
+        ssk = bpf_mptcp_subflow_tcp_sock(subflow);
112
+        if (!ssk)
113
+            continue;
114
+
115
+        if (local->family != ssk->sk_family)
116
+            continue;
117
+
118
+        issk = bpf_core_cast(inet_sk(ssk), struct inet_sock);
119
+
120
+        switch (ssk->sk_family) {
121
+        case AF_INET:
122
+            if (issk->inet_saddr != local->addr.s_addr ||
123
+             issk->inet_daddr != remote->addr.s_addr)
124
+                continue;
125
+            break;
126
+        case AF_INET6: {
127
+            if (!ipv6_addr_equal(local->addr6, issk->pinet6->saddr) ||
128
+             !ipv6_addr_equal(remote->addr6, ssk->sk_v6_daddr))
129
+                continue;
130
+            break;
131
+        }
132
+        default:
133
+            continue;
134
+        }
135
+
136
+        if (issk->inet_sport == local->port &&
137
+         issk->inet_dport == remote->port)
138
+            return ssk;
139
+    }
140
+
141
+    return NULL;
142
+}
143
+
144
#endif
145
--
27
--
146
2.43.0
28
2.43.0
diff view generated by jsdifflib
1
From: Geliang Tang <tanggeliang@kylinos.cn>
1
From: Geliang Tang <tanggeliang@kylinos.cn>
2
2
3
To verify that the behavior of BPF path manager is the same as that of
3
To reduce the path manager's reliance on mptcp_pm_is_userspace()
4
userspace pm in the kernel, a userspace pm self-test has been added.
4
and mptcp_pm_is_kernel() helpers, this patch drops the check for
5
BPF path manager in the next commit will also use this test.
5
mptcp_pm_is_kernel() in the function mptcp_pm_alloc_anno_list().
6
Instead, add a new parameter "reissue" for this function, pass
7
"false" to this function in the in-kernel PM while pass "true"
8
to it in the userspace PM.
6
9
7
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
10
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
8
---
11
---
9
.../testing/selftests/bpf/prog_tests/mptcp.c | 160 ++++++++++++++++++
12
net/mptcp/pm_netlink.c | 7 ++++---
10
1 file changed, 160 insertions(+)
13
net/mptcp/pm_userspace.c | 2 +-
14
net/mptcp/protocol.h | 3 ++-
15
3 files changed, 7 insertions(+), 5 deletions(-)
11
16
12
diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c
17
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
13
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
14
--- a/tools/testing/selftests/bpf/prog_tests/mptcp.c
19
--- a/net/mptcp/pm_netlink.c
15
+++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
20
+++ b/net/mptcp/pm_netlink.c
16
@@ -XXX,XX +XXX,XX @@ enum mptcp_pm_type {
21
@@ -XXX,XX +XXX,XX @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk,
17
    __MPTCP_PM_TYPE_MAX = __MPTCP_PM_TYPE_NR - 1,
18
};
19
20
+enum mptcp_pm_family {
21
+    IPV4 = 0,
22
+    IPV4MAPPED,
23
+    IPV6,
24
+};
25
+
26
static const unsigned int total_bytes = 10 * 1024 * 1024;
27
static int duration;
28
29
@@ -XXX,XX +XXX,XX @@ static void test_iters_address(void)
30
    close(cgroup_fd);
31
}
22
}
32
23
33
+static int userspace_pm_add_addr(__u32 token, char *addr, __u8 id)
24
bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
34
+{
25
-             const struct mptcp_addr_info *addr)
35
+    return SYS_NOFAIL("ip netns exec %s %s ann %s id %u token %u",
26
+             const struct mptcp_addr_info *addr,
36
+             NS_TEST, PM_CTL, addr, id, token);
27
+             bool reissue)
37
+}
38
+
39
+static int userspace_pm_rm_addr(__u32 token, __u8 id)
40
+{
41
+    return SYS_NOFAIL("ip netns exec %s %s rem id %u token %u",
42
+             NS_TEST, PM_CTL, id, token);
43
+}
44
+
45
+static int userspace_pm_rm_subflow(__u32 token, char *addr, __u8 id)
46
+{
47
+    bool ipv6 = strstr(addr, ":");
48
+    char line[1024], *str;
49
+    __u32 sport, dport;
50
+
51
+    if (userspace_pm_get_events_line("type:10", line))
52
+        return -1;
53
+
54
+    str = strstr(line, "sport");
55
+    if (!str || sscanf(str, "sport:%u,dport:%u,", &sport, &dport) != 2) {
56
+        log_err("rm_subflow error, str=%s\n", str);
57
+        return -1;
58
+    }
59
+
60
+    str = ipv6 ? (strstr(addr, ".") ? "::ffff:"ADDR_1 : ADDR6_1) : ADDR_1;
61
+    return SYS_NOFAIL("ip netns exec %s %s dsf lip %s lport %u rip %s rport %u token %u",
62
+             NS_TEST, PM_CTL, addr, sport, str, dport, token);
63
+}
64
+
65
+static int userspace_pm_set_flags(__u32 token, char *addr, char *flags)
66
+{
67
+    bool ipv6 = strstr(addr, ":");
68
+    char line[1024], *str;
69
+    __u32 sport, dport;
70
+
71
+    if (userspace_pm_get_events_line("type:10", line))
72
+        return -1;
73
+
74
+    str = strstr(line, "sport");
75
+    if (!str || sscanf(str, "sport:%u,dport:%u,", &sport, &dport) != 2) {
76
+        log_err("set_flags error, str=%s\n", str);
77
+        return -1;
78
+    }
79
+
80
+    str = ipv6 ? (strstr(addr, ".") ? "::ffff:"ADDR_1 : ADDR6_1) : ADDR_1;
81
+    return SYS_NOFAIL("ip netns exec %s %s set %s port %u rip %s rport %u flags %s token %u",
82
+             NS_TEST, PM_CTL, addr, sport, str, dport, flags, token);
83
+}
84
+
85
+static void run_userspace_pm(enum mptcp_pm_family family)
86
+{
87
+    bool ipv4mapped = (family == IPV4MAPPED);
88
+    bool ipv6 = (family == IPV6 || ipv4mapped);
89
+    int server_fd, client_fd, accept_fd;
90
+    __u32 token;
91
+    char *addr;
92
+    int err;
93
+
94
+    addr = ipv6 ? (ipv4mapped ? "::ffff:"ADDR_1 : ADDR6_1) : ADDR_1;
95
+    server_fd = start_mptcp_server(ipv6 ? AF_INET6 : AF_INET, addr, PORT_1, 0);
96
+    if (!ASSERT_OK_FD(server_fd, "start_mptcp_server"))
97
+        return;
98
+
99
+    client_fd = connect_to_fd(server_fd, 0);
100
+    if (!ASSERT_OK_FD(client_fd, "connect_to_fd"))
101
+        goto close_server;
102
+
103
+    accept_fd = accept(server_fd, NULL, NULL);
104
+    if (!ASSERT_OK_FD(accept_fd, "accept"))
105
+        goto close_client;
106
+
107
+    token = userspace_pm_get_token(client_fd);
108
+    if (!token)
109
+        goto close_client;
110
+    recv_byte(accept_fd);
111
+    usleep(200000); /* 0.2s */
112
+
113
+    addr = ipv6 ? (ipv4mapped ? "::ffff:"ADDR_2 : ADDR6_2) : ADDR_2;
114
+    err = userspace_pm_add_subflow(token, addr, 100);
115
+    if (!ASSERT_OK(err, "userspace_pm_add_subflow 100"))
116
+        goto close_accept;
117
+
118
+    send_byte(accept_fd);
119
+    recv_byte(client_fd);
120
+
121
+    err = userspace_pm_set_flags(token, addr, "backup");
122
+    if (!ASSERT_OK(err, "userspace_pm_set_flags backup"))
123
+        goto close_accept;
124
+
125
+    send_byte(client_fd);
126
+    recv_byte(accept_fd);
127
+
128
+    err = userspace_pm_set_flags(token, addr, "nobackup");
129
+    if (!ASSERT_OK(err, "userspace_pm_set_flags nobackup"))
130
+        goto close_accept;
131
+
132
+    send_byte(accept_fd);
133
+    recv_byte(client_fd);
134
+
135
+    err = userspace_pm_rm_subflow(token, addr, 100);
136
+    if (!ASSERT_OK(err, "userspace_pm_rm_subflow 100"))
137
+        goto close_accept;
138
+
139
+    send_byte(client_fd);
140
+    recv_byte(accept_fd);
141
+
142
+    addr = ipv6 ? (ipv4mapped ? "::ffff:"ADDR_3 : ADDR6_3) : ADDR_3;
143
+    err = userspace_pm_add_addr(token, addr, 200);
144
+    if (!ASSERT_OK(err, "userspace_pm_add_addr 200"))
145
+        goto close_accept;
146
+
147
+    send_byte(accept_fd);
148
+    recv_byte(client_fd);
149
+
150
+    err = userspace_pm_rm_addr(token, 200);
151
+    if (!ASSERT_OK(err, "userspace_pm_rm_addr 200"))
152
+        goto close_accept;
153
+
154
+    send_byte(client_fd);
155
+    recv_byte(accept_fd);
156
+
157
+close_accept:
158
+    close(accept_fd);
159
+close_client:
160
+    close(client_fd);
161
+close_server:
162
+    close(server_fd);
163
+}
164
+
165
+static void test_userspace_pm(void)
166
+{
167
+    struct netns_obj *netns;
168
+    int err;
169
+
170
+    netns = netns_new(NS_TEST, true);
171
+    if (!ASSERT_OK_PTR(netns, "netns_new"))
172
+        return;
173
+
174
+    err = userspace_pm_init(MPTCP_PM_TYPE_USERSPACE);
175
+    if (!ASSERT_OK(err, "userspace_pm_init: userspace pm"))
176
+        goto fail;
177
+
178
+    run_userspace_pm(IPV4MAPPED);
179
+
180
+    userspace_pm_cleanup();
181
+fail:
182
+    netns_free(netns);
183
+}
184
+
185
static struct netns_obj *sched_init(char *flags, char *sched)
186
{
28
{
187
    struct netns_obj *netns;
29
    struct mptcp_pm_add_entry *add_entry = NULL;
188
@@ -XXX,XX +XXX,XX @@ void test_mptcp(void)
30
    struct sock *sk = (struct sock *)msk;
189
        test_iters_subflow();
31
@@ -XXX,XX +XXX,XX @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
190
    if (test__start_subtest("iters_address"))
32
    add_entry = mptcp_lookup_anno_list_by_saddr(msk, addr);
191
        test_iters_address();
33
192
+    if (test__start_subtest("userspace_pm"))
34
    if (add_entry) {
193
+        test_userspace_pm();
35
-        if (WARN_ON_ONCE(mptcp_pm_is_kernel(msk)))
194
    if (test__start_subtest("default"))
36
+        if (WARN_ON_ONCE(!reissue))
195
        test_default();
37
            return false;
196
    if (test__start_subtest("first"))
38
39
        sk_reset_timer(sk, &add_entry->add_timer,
40
@@ -XXX,XX +XXX,XX @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
41
        /* If the alloc fails, we are on memory pressure, not worth
42
         * continuing, and trying to create subflows.
43
         */
44
-        if (!mptcp_pm_alloc_anno_list(msk, &local.addr))
45
+        if (!mptcp_pm_alloc_anno_list(msk, &local.addr, false))
46
            return;
47
48
        __clear_bit(local.addr.id, msk->pm.id_avail_bitmap);
49
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
50
index XXXXXXX..XXXXXXX 100644
51
--- a/net/mptcp/pm_userspace.c
52
+++ b/net/mptcp/pm_userspace.c
53
@@ -XXX,XX +XXX,XX @@ int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info)
54
    lock_sock(sk);
55
    spin_lock_bh(&msk->pm.lock);
56
57
-    if (mptcp_pm_alloc_anno_list(msk, &addr_val.addr)) {
58
+    if (mptcp_pm_alloc_anno_list(msk, &addr_val.addr, true)) {
59
        msk->pm.add_addr_signaled++;
60
        mptcp_pm_announce_addr(msk, &addr_val.addr, false);
61
        mptcp_pm_nl_addr_send_ack(msk);
62
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
63
index XXXXXXX..XXXXXXX 100644
64
--- a/net/mptcp/protocol.h
65
+++ b/net/mptcp/protocol.h
66
@@ -XXX,XX +XXX,XX @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
67
                 struct mptcp_addr_info *rem,
68
                 u8 bkup);
69
bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
70
-             const struct mptcp_addr_info *addr);
71
+             const struct mptcp_addr_info *addr,
72
+             bool reissue);
73
void mptcp_pm_free_anno_list(struct mptcp_sock *msk);
74
bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk);
75
struct mptcp_pm_add_entry *
197
--
76
--
198
2.43.0
77
2.43.0
diff view generated by jsdifflib
1
From: Geliang Tang <tanggeliang@kylinos.cn>
1
From: Geliang Tang <tanggeliang@kylinos.cn>
2
2
3
This patch adds an mptcp bpf userspace pm example program, implements
3
There're duplicate operations in mptcp_nl_remove_subflow_and_signal_addr()
4
address_announce, address_remove, subflow_create, subflow_destroy,
4
and mptcp_nl_remove_id_zero_address(), both of which traverse all mptcp
5
get_local_id, is_backup, and set_flags interfaces using almost the
5
sockets in the net namespace. This patch drops the traversal operation in
6
same logic as the userspace pm in kernel.
6
the latter and reuse the traversal loop of the former to do the removal of
7
id zero address.
8
9
An additional benefit is that the check for mptcp_pm_is_userspace() in
10
mptcp_nl_remove_id_zero_address() is dropped, which reduces the path
11
manager's reliance on mptcp_pm_is_userspace() and mptcp_pm_is_kernel()
12
helpers.
7
13
8
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
14
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
9
---
15
---
10
.../testing/selftests/bpf/prog_tests/mptcp.c | 51 ++++
16
net/mptcp/pm_netlink.c | 77 ++++++++++++++++++++----------------------
11
tools/testing/selftests/bpf/progs/mptcp_bpf.h | 75 +++++
17
1 file changed, 37 insertions(+), 40 deletions(-)
12
.../bpf/progs/mptcp_bpf_userspace_pm.c | 275 ++++++++++++++++++
13
3 files changed, 401 insertions(+)
14
create mode 100644 tools/testing/selftests/bpf/progs/mptcp_bpf_userspace_pm.c
15
18
16
diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c
19
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
17
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
18
--- a/tools/testing/selftests/bpf/prog_tests/mptcp.c
21
--- a/net/mptcp/pm_netlink.c
19
+++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
22
+++ b/net/mptcp/pm_netlink.c
20
@@ -XXX,XX +XXX,XX @@
23
@@ -XXX,XX +XXX,XX @@ static void __mark_subflow_endp_available(struct mptcp_sock *msk, u8 id)
21
#include "mptcpify.skel.h"
24
        msk->pm.local_addr_used--;
22
#include "mptcp_subflow.skel.h"
23
#include "mptcp_bpf_iters.skel.h"
24
+#include "mptcp_bpf_userspace_pm.skel.h"
25
#include "mptcp_bpf_first.skel.h"
26
#include "mptcp_bpf_bkup.skel.h"
27
#include "mptcp_bpf_rr.skel.h"
28
@@ -XXX,XX +XXX,XX @@
29
enum mptcp_pm_type {
30
    MPTCP_PM_TYPE_KERNEL = 0,
31
    MPTCP_PM_TYPE_USERSPACE,
32
+    MPTCP_PM_TYPE_BPF_USERSPACE,
33
34
    __MPTCP_PM_TYPE_NR,
35
    __MPTCP_PM_TYPE_MAX = __MPTCP_PM_TYPE_NR - 1,
36
@@ -XXX,XX +XXX,XX @@ static void test_userspace_pm(void)
37
    netns_free(netns);
38
}
25
}
39
26
40
+static void test_bpf_path_manager(void)
27
+static void mptcp_nl_remove_id_zero_address(struct mptcp_sock *msk,
28
+                     const struct mptcp_addr_info *addr)
41
+{
29
+{
42
+    struct mptcp_bpf_userspace_pm *skel;
30
+    struct mptcp_rm_list list = { .nr = 0 };
43
+    struct netns_obj *netns;
31
+    struct mptcp_addr_info msk_local;
44
+    int err;
45
+
32
+
46
+    skel = mptcp_bpf_userspace_pm__open();
33
+    if (list_empty(&msk->conn_list))
47
+    if (!ASSERT_OK_PTR(skel, "open: userspace_pm"))
48
+        return;
34
+        return;
49
+
35
+
50
+    err = bpf_program__set_flags(skel->progs.mptcp_userspace_pm_address_announced,
36
+    mptcp_local_address((struct sock_common *)msk, &msk_local);
51
+                 BPF_F_SLEEPABLE);
37
+    if (!mptcp_addresses_equal(&msk_local, addr, addr->port))
52
+    err = err ?: bpf_program__set_flags(skel->progs.mptcp_userspace_pm_address_removed,
38
+        return;
53
+                     BPF_F_SLEEPABLE);
54
+    err = err ?: bpf_program__set_flags(skel->progs.mptcp_userspace_pm_subflow_established,
55
+                     BPF_F_SLEEPABLE);
56
+    err = err ?: bpf_program__set_flags(skel->progs.mptcp_userspace_pm_subflow_closed,
57
+                     BPF_F_SLEEPABLE);
58
+    err = err ?: bpf_program__set_flags(skel->progs.mptcp_userspace_pm_set_priority,
59
+                     BPF_F_SLEEPABLE);
60
+    if (!ASSERT_OK(err, "set sleepable flags"))
61
+        goto skel_destroy;
62
+
39
+
63
+    if (!ASSERT_OK(mptcp_bpf_userspace_pm__load(skel), "load: userspace_pm"))
40
+    list.ids[list.nr++] = 0;
64
+        goto skel_destroy;
65
+
41
+
66
+    err = mptcp_bpf_userspace_pm__attach(skel);
42
+    spin_lock_bh(&msk->pm.lock);
67
+    if (!ASSERT_OK(err, "attach: userspace_pm"))
43
+    mptcp_pm_remove_addr(msk, &list);
68
+        goto skel_destroy;
44
+    mptcp_pm_nl_rm_subflow_received(msk, &list);
69
+
45
+    __mark_subflow_endp_available(msk, 0);
70
+    netns = netns_new(NS_TEST, true);
46
+    spin_unlock_bh(&msk->pm.lock);
71
+    if (!ASSERT_OK_PTR(netns, "netns_new"))
72
+        goto skel_destroy;
73
+
74
+    err = userspace_pm_init(MPTCP_PM_TYPE_BPF_USERSPACE);
75
+    if (!ASSERT_OK(err, "userspace_pm_init: bpf pm"))
76
+        goto close_netns;
77
+
78
+    run_userspace_pm(skel->kconfig->CONFIG_MPTCP_IPV6 ? IPV6 : IPV4);
79
+
80
+    userspace_pm_cleanup();
81
+close_netns:
82
+    netns_free(netns);
83
+skel_destroy:
84
+    mptcp_bpf_userspace_pm__destroy(skel);
85
+}
47
+}
86
+
48
+
87
static struct netns_obj *sched_init(char *flags, char *sched)
49
static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net,
50
                         const struct mptcp_pm_addr_entry *entry)
88
{
51
{
89
    struct netns_obj *netns;
52
@@ -XXX,XX +XXX,XX @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net,
90
@@ -XXX,XX +XXX,XX @@ void test_mptcp(void)
53
            goto next;
91
        test_iters_address();
54
92
    if (test__start_subtest("userspace_pm"))
55
        lock_sock(sk);
93
        test_userspace_pm();
56
+        if (entry->addr.id == 0) {
94
+    if (test__start_subtest("bpf_path_manager"))
57
+            mptcp_nl_remove_id_zero_address(msk, &entry->addr);
95
+        test_bpf_path_manager();
58
+            goto out;
96
    if (test__start_subtest("default"))
97
        test_default();
98
    if (test__start_subtest("first"))
99
diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
100
index XXXXXXX..XXXXXXX 100644
101
--- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h
102
+++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
103
@@ -XXX,XX +XXX,XX @@ static inline int list_is_head(const struct list_head *list,
104
#define    ENOMEM        12    /* Out of Memory */
105
#define    EINVAL        22    /* Invalid argument */
106
107
+/* mptcp helpers from include/net/mptcp.h */
108
+#define U8_MAX        ((u8)~0U)
109
+
110
+/* max value of mptcp_addr_info.id */
111
+#define MPTCP_PM_MAX_ADDR_ID        U8_MAX
112
+
113
+/* mptcp macros from include/uapi/linux/mptcp.h */
114
+#define MPTCP_PM_ADDR_FLAG_SIGNAL            (1 << 0)
115
+#define MPTCP_PM_ADDR_FLAG_SUBFLOW            (1 << 1)
116
+#define MPTCP_PM_ADDR_FLAG_BACKUP            (1 << 2)
117
+#define MPTCP_PM_ADDR_FLAG_FULLMESH            (1 << 3)
118
+#define MPTCP_PM_ADDR_FLAG_IMPLICIT            (1 << 4)
119
+
120
+/* address families macros from include/linux/socket.h */
121
+#define AF_UNSPEC    0
122
+#define AF_INET        2
123
+#define AF_INET6    10
124
+
125
+/* shutdown macros from include/net/sock.h */
126
+#define RCV_SHUTDOWN    1
127
+#define SEND_SHUTDOWN    2
128
+
129
+/* GFP macros from include/linux/gfp_types.h */
130
+#define __AC(X,Y)    (X##Y)
131
+#define _AC(X,Y)    __AC(X,Y)
132
+#define _UL(x)        (_AC(x, UL))
133
+#define UL(x)        (_UL(x))
134
+#define BIT(nr)        (UL(1) << (nr))
135
+
136
+#define ___GFP_HIGH        BIT(___GFP_HIGH_BIT)
137
+#define __GFP_HIGH        ((gfp_t)___GFP_HIGH)
138
+#define ___GFP_KSWAPD_RECLAIM    BIT(___GFP_KSWAPD_RECLAIM_BIT)
139
+#define __GFP_KSWAPD_RECLAIM    ((gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */
140
+#define GFP_ATOMIC        (__GFP_HIGH|__GFP_KSWAPD_RECLAIM)
141
+
142
static __always_inline struct sock *
143
mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow)
144
{
145
@@ -XXX,XX +XXX,XX @@ extern void bpf_spin_unlock_bh(spinlock_t *lock) __ksym;
146
147
extern bool bpf_ipv4_is_private_10(__be32 addr) __ksym;
148
149
+extern struct mptcp_pm_addr_entry *
150
+bpf_sock_kmalloc_entry(struct sock *sk, int size, gfp_t priority) __ksym;
151
+extern void
152
+bpf_sock_kfree_entry(struct sock *sk, struct mptcp_pm_addr_entry *entry,
153
+         int size) __ksym;
154
+
155
+extern bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
156
+                 const struct mptcp_addr_info *addr) __ksym;
157
+extern int mptcp_pm_announce_addr(struct mptcp_sock *msk,
158
+                 const struct mptcp_addr_info *addr,
159
+                 bool echo) __ksym;
160
+extern void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk) __ksym;
161
+
162
+extern void bpf_bitmap_zero(unsigned long *dst, unsigned int nbits) __ksym;
163
+extern void bpf_set_bit(unsigned long nr, unsigned long *addr) __ksym;
164
+extern u8 bpf_find_next_zero_bit(const unsigned long *addr,
165
+                 unsigned long size, unsigned long offset) __ksym;
166
+
167
+extern int mptcp_pm_remove_addr(struct mptcp_sock *msk,
168
+                const struct mptcp_rm_list *rm_list) __ksym;
169
+extern void mptcp_pm_remove_addr_entry(struct mptcp_sock *msk,
170
+                 struct mptcp_pm_addr_entry *entry) __ksym;
171
+
172
+extern int bpf_mptcp_subflow_connect(struct sock *sk,
173
+                 const struct mptcp_pm_addr_entry *entry,
174
+                 const struct mptcp_addr_info *remote) __ksym;
175
+
176
+extern void
177
+mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how) __ksym;
178
+extern void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
179
+             struct mptcp_subflow_context *subflow) __ksym;
180
+extern struct net *bpf_sock_net(const struct sock *sk) __ksym;
181
+extern void BPF_MPTCP_INC_STATS(struct net *net,
182
+                enum linux_mptcp_mib_field field) __ksym;
183
+
184
+extern int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
185
+                    struct mptcp_addr_info *addr,
186
+                    struct mptcp_addr_info *rem,
187
+                    u8 bkup) __ksym;
188
+
189
extern void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow,
190
                    bool scheduled) __ksym;
191
192
diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_userspace_pm.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_userspace_pm.c
193
new file mode 100644
194
index XXXXXXX..XXXXXXX
195
--- /dev/null
196
+++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_userspace_pm.c
197
@@ -XXX,XX +XXX,XX @@
198
+// SPDX-License-Identifier: GPL-2.0
199
+/* Copyright (c) 2025, Kylin Software */
200
+
201
+#include "mptcp_bpf.h"
202
+
203
+char _license[] SEC("license") = "GPL";
204
+
205
+extern bool CONFIG_MPTCP_IPV6 __kconfig __weak;
206
+
207
+extern void bpf_list_add_tail_rcu(struct list_head *new,
208
+                 struct list_head *head) __ksym;
209
+extern void bpf_list_del_rcu(struct list_head *entry) __ksym;
210
+
211
+SEC("struct_ops")
212
+void BPF_PROG(mptcp_userspace_pm_init, struct mptcp_sock *msk)
213
+{
214
+    bpf_printk("BPF userspace PM (%s)",
215
+         CONFIG_MPTCP_IPV6 ? "IPv6" : "IPv4");
216
+}
217
+
218
+SEC("struct_ops")
219
+void BPF_PROG(mptcp_userspace_pm_release, struct mptcp_sock *msk)
220
+{
221
+}
222
+
223
+static struct mptcp_pm_addr_entry *
224
+mptcp_userspace_pm_lookup_addr(struct mptcp_sock *msk,
225
+             const struct mptcp_addr_info *addr)
226
+{
227
+    struct mptcp_pm_addr_entry *entry;
228
+
229
+    bpf_for_each(mptcp_userspace_pm_addr, entry, (struct sock *)msk) {
230
+        if (mptcp_addresses_equal(&entry->addr, addr, false))
231
+            return entry;
232
+    }
233
+    return NULL;
234
+}
235
+
236
+static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
237
+                         struct mptcp_pm_addr_entry *entry,
238
+                         bool needs_id)
239
+{
240
+    struct sock *sk = (struct sock *)msk;
241
+    unsigned long id_bitmap[4] = { 0 };
242
+    struct mptcp_pm_addr_entry *e;
243
+    bool addr_match = false;
244
+    bool id_match = false;
245
+    int ret = -EINVAL;
246
+
247
+    bpf_bitmap_zero(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
248
+
249
+    bpf_spin_lock_bh(&msk->pm.lock);
250
+    bpf_for_each(mptcp_userspace_pm_addr, e, sk) {
251
+        addr_match = mptcp_addresses_equal(&e->addr, &entry->addr, true);
252
+        if (addr_match && entry->addr.id == 0 && needs_id)
253
+            entry->addr.id = e->addr.id;
254
+        id_match = (e->addr.id == entry->addr.id);
255
+        if (addr_match || id_match)
256
+            break;
257
+        bpf_set_bit(e->addr.id, id_bitmap);
258
+    }
259
+
260
+    if (!addr_match && !id_match) {
261
+        /* Memory for the entry is allocated from the
262
+         * sock option buffer.
263
+         */
264
+        e = bpf_sock_kmalloc_entry(sk, sizeof(*e), GFP_ATOMIC);
265
+        if (!e) {
266
+            ret = -ENOMEM;
267
+            goto append_err;
268
+        }
59
+        }
269
+
60
+
270
+        mptcp_pm_copy_entry(e, entry);
61
        remove_subflow = mptcp_lookup_subflow_by_saddr(&msk->conn_list, addr);
271
+        if (!e->addr.id && needs_id)
62
        mptcp_pm_remove_anno_addr(msk, addr, remove_subflow &&
272
+            e->addr.id = bpf_find_next_zero_bit(id_bitmap,
63
                     !(entry->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT));
273
+                             MPTCP_PM_MAX_ADDR_ID + 1,
64
@@ -XXX,XX +XXX,XX @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net,
274
+                             1);
65
275
+        bpf_list_add_tail_rcu(&e->list, &msk->pm.userspace_pm_local_addr_list);
66
        if (msk->mpc_endpoint_id == entry->addr.id)
276
+        msk->pm.local_addr_used++;
67
            msk->mpc_endpoint_id = 0;
277
+        ret = e->addr.id;
68
-        release_sock(sk);
278
+    } else if (addr_match && id_match) {
69
-
279
+        ret = entry->addr.id;
70
-next:
71
-        sock_put(sk);
72
-        cond_resched();
73
-    }
74
-
75
-    return 0;
76
-}
77
-
78
-static int mptcp_nl_remove_id_zero_address(struct net *net,
79
-                     struct mptcp_addr_info *addr)
80
-{
81
-    struct mptcp_rm_list list = { .nr = 0 };
82
-    long s_slot = 0, s_num = 0;
83
-    struct mptcp_sock *msk;
84
-
85
-    list.ids[list.nr++] = 0;
86
-
87
-    while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) {
88
-        struct sock *sk = (struct sock *)msk;
89
-        struct mptcp_addr_info msk_local;
90
-
91
-        if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk))
92
-            goto next;
93
-
94
-        mptcp_local_address((struct sock_common *)msk, &msk_local);
95
-        if (!mptcp_addresses_equal(&msk_local, addr, addr->port))
96
-            goto next;
97
-
98
-        lock_sock(sk);
99
-        spin_lock_bh(&msk->pm.lock);
100
-        mptcp_pm_remove_addr(msk, &list);
101
-        mptcp_pm_nl_rm_subflow_received(msk, &list);
102
-        __mark_subflow_endp_available(msk, 0);
103
-        spin_unlock_bh(&msk->pm.lock);
104
+out:
105
        release_sock(sk);
106
107
next:
108
@@ -XXX,XX +XXX,XX @@ int mptcp_pm_nl_del_addr_doit(struct sk_buff *skb, struct genl_info *info)
109
     * id addresses. Additionally zero id is not accounted for in id_bitmap.
110
     * Let's use an 'mptcp_rm_list' instead of the common remove code.
111
     */
112
-    if (addr.addr.id == 0)
113
-        return mptcp_nl_remove_id_zero_address(sock_net(skb->sk), &addr.addr);
114
+    if (addr.addr.id == 0) {
115
+        entry = &addr;
116
+        goto del_addr;
280
+    }
117
+    }
281
+
118
282
+append_err:
119
    spin_lock_bh(&pernet->lock);
283
+    bpf_spin_unlock_bh(&msk->pm.lock);
120
    entry = __lookup_addr_by_id(pernet, addr.addr.id);
284
+    return ret;
121
@@ -XXX,XX +XXX,XX @@ int mptcp_pm_nl_del_addr_doit(struct sk_buff *skb, struct genl_info *info)
285
+}
122
    __clear_bit(entry->addr.id, pernet->id_bitmap);
286
+
123
    spin_unlock_bh(&pernet->lock);
287
+SEC("struct_ops")
124
288
+int BPF_PROG(mptcp_userspace_pm_address_announced, struct mptcp_sock *msk,
125
+del_addr:
289
+     struct mptcp_pm_addr_entry *local)
126
    mptcp_nl_remove_subflow_and_signal_addr(sock_net(skb->sk), entry);
290
+{
127
-    synchronize_rcu();
291
+    int err;
128
-    __mptcp_pm_release_addr_entry(entry);
292
+
129
+    if (entry->addr.id) {
293
+    err = mptcp_userspace_pm_append_new_local_addr(msk, local, false);
130
+        synchronize_rcu();
294
+    if (err < 0)
131
+        __mptcp_pm_release_addr_entry(entry);
295
+        return err;
296
+
297
+    bpf_spin_lock_bh(&msk->pm.lock);
298
+
299
+    if (mptcp_pm_alloc_anno_list(msk, &local->addr)) {
300
+        msk->pm.add_addr_signaled++;
301
+        mptcp_pm_announce_addr(msk, &local->addr, false);
302
+        mptcp_pm_nl_addr_send_ack(msk);
303
+    }
132
+    }
304
+
133
305
+    bpf_spin_unlock_bh(&msk->pm.lock);
134
    return ret;
306
+
135
}
307
+    return 0;
308
+}
309
+
310
+static struct mptcp_pm_addr_entry *
311
+mptcp_userspace_pm_lookup_addr_by_id(struct mptcp_sock *msk, unsigned int id)
312
+{
313
+    struct mptcp_pm_addr_entry *entry;
314
+
315
+    bpf_for_each(mptcp_userspace_pm_addr, entry, (struct sock *)msk) {
316
+        if (entry->addr.id == id)
317
+            return entry;
318
+    }
319
+    return NULL;
320
+}
321
+
322
+SEC("struct_ops")
323
+int BPF_PROG(mptcp_userspace_pm_address_removed, struct mptcp_sock *msk, u8 id)
324
+{
325
+    struct mptcp_pm_addr_entry *entry;
326
+
327
+    bpf_spin_lock_bh(&msk->pm.lock);
328
+    entry = mptcp_userspace_pm_lookup_addr_by_id(msk, id);
329
+    if (!entry) {
330
+        bpf_spin_unlock_bh(&msk->pm.lock);
331
+        return -EINVAL;
332
+    }
333
+
334
+    bpf_list_del_rcu(&entry->list);
335
+    bpf_spin_unlock_bh(&msk->pm.lock);
336
+
337
+    mptcp_pm_remove_addr_entry(msk, entry);
338
+
339
+    bpf_sock_kfree_entry((struct sock *)msk, entry, sizeof(*entry));
340
+
341
+    return 0;
342
+}
343
+
344
+static int mptcp_userspace_pm_delete_local_addr(struct mptcp_sock *msk,
345
+                        struct mptcp_pm_addr_entry *addr)
346
+{
347
+    struct sock *sk = (struct sock *)msk;
348
+    struct mptcp_pm_addr_entry *entry;
349
+
350
+    entry = mptcp_userspace_pm_lookup_addr(msk, &addr->addr);
351
+    if (!entry)
352
+        return -EINVAL;
353
+
354
+    bpf_list_del_rcu(&entry->list);
355
+    bpf_sock_kfree_entry(sk, entry, sizeof(*entry));
356
+    msk->pm.local_addr_used--;
357
+    return 0;
358
+}
359
+
360
+SEC("struct_ops")
361
+int BPF_PROG(mptcp_userspace_pm_subflow_established, struct mptcp_sock *msk,
362
+     struct mptcp_pm_addr_entry *local, struct mptcp_addr_info *remote)
363
+{
364
+    struct sock *sk = (struct sock *)msk;
365
+    int err;
366
+
367
+    err = mptcp_userspace_pm_append_new_local_addr(msk, local, false);
368
+    if (err < 0)
369
+        return err;
370
+
371
+    err = bpf_mptcp_subflow_connect(sk, local, remote);
372
+    bpf_spin_lock_bh(&msk->pm.lock);
373
+    if (err)
374
+        mptcp_userspace_pm_delete_local_addr(msk, local);
375
+    else
376
+        msk->pm.subflows++;
377
+    bpf_spin_unlock_bh(&msk->pm.lock);
378
+
379
+    return err;
380
+}
381
+
382
+SEC("struct_ops")
383
+int BPF_PROG(mptcp_userspace_pm_subflow_closed, struct mptcp_sock *msk,
384
+     struct mptcp_pm_addr_entry *local, struct mptcp_addr_info *remote)
385
+{
386
+    struct sock *ssk, *sk = (struct sock *)msk;
387
+    struct mptcp_subflow_context *subflow;
388
+
389
+    ssk = mptcp_pm_find_ssk(msk, &local->addr, remote);
390
+    if (!ssk)
391
+        return -ESRCH;
392
+
393
+    subflow = bpf_mptcp_subflow_ctx(ssk);
394
+    if (!subflow)
395
+        return -EINVAL;
396
+
397
+    bpf_spin_lock_bh(&msk->pm.lock);
398
+    mptcp_userspace_pm_delete_local_addr(msk, local);
399
+    bpf_spin_unlock_bh(&msk->pm.lock);
400
+    mptcp_subflow_shutdown(sk, ssk, RCV_SHUTDOWN | SEND_SHUTDOWN);
401
+    mptcp_close_ssk(sk, ssk, subflow);
402
+    BPF_MPTCP_INC_STATS(bpf_sock_net(sk), MPTCP_MIB_RMSUBFLOW);
403
+
404
+    return 0;
405
+}
406
+
407
+SEC("struct_ops")
408
+int BPF_PROG(mptcp_userspace_pm_get_local_id, struct mptcp_sock *msk,
409
+     struct mptcp_pm_addr_entry *skc)
410
+{
411
+    struct mptcp_pm_addr_entry *entry;
412
+
413
+    bpf_spin_lock_bh(&msk->pm.lock);
414
+    entry = mptcp_userspace_pm_lookup_addr(msk, &skc->addr);
415
+    bpf_spin_unlock_bh(&msk->pm.lock);
416
+    if (entry)
417
+        return entry->addr.id;
418
+
419
+    return mptcp_userspace_pm_append_new_local_addr(msk, skc, true);
420
+}
421
+
422
+SEC("struct_ops")
423
+bool BPF_PROG(mptcp_userspace_pm_get_priority, struct mptcp_sock *msk,
424
+     struct mptcp_addr_info *skc)
425
+{
426
+    struct mptcp_pm_addr_entry *entry;
427
+    bool backup;
428
+
429
+    bpf_spin_lock_bh(&msk->pm.lock);
430
+    entry = mptcp_userspace_pm_lookup_addr(msk, skc);
431
+    backup = entry && !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP);
432
+    bpf_spin_unlock_bh(&msk->pm.lock);
433
+
434
+    return backup;
435
+}
436
+
437
+SEC("struct_ops")
438
+int BPF_PROG(mptcp_userspace_pm_set_priority, struct mptcp_sock *msk,
439
+     struct mptcp_pm_addr_entry *local, struct mptcp_addr_info *remote)
440
+{
441
+    struct mptcp_pm_addr_entry *entry;
442
+    u8 bkup = 0;
443
+
444
+    if (local->flags & MPTCP_PM_ADDR_FLAG_BACKUP)
445
+        bkup = 1;
446
+
447
+    bpf_spin_lock_bh(&msk->pm.lock);
448
+    entry = mptcp_userspace_pm_lookup_addr(msk, &local->addr);
449
+    if (entry) {
450
+        if (bkup)
451
+            entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
452
+        else
453
+            entry->flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP;
454
+    }
455
+    bpf_spin_unlock_bh(&msk->pm.lock);
456
+
457
+    return mptcp_pm_nl_mp_prio_send_ack(msk, &local->addr, remote, bkup);
458
+}
459
+
460
+SEC(".struct_ops.link")
461
+struct mptcp_pm_ops userspace_pm = {
462
+    .address_announced    = (void *)mptcp_userspace_pm_address_announced,
463
+    .address_removed    = (void *)mptcp_userspace_pm_address_removed,
464
+    .subflow_established    = (void *)mptcp_userspace_pm_subflow_established,
465
+    .subflow_closed        = (void *)mptcp_userspace_pm_subflow_closed,
466
+    .get_local_id        = (void *)mptcp_userspace_pm_get_local_id,
467
+    .get_priority        = (void *)mptcp_userspace_pm_get_priority,
468
+    .set_priority        = (void *)mptcp_userspace_pm_set_priority,
469
+    .init            = (void *)mptcp_userspace_pm_init,
470
+    .release        = (void *)mptcp_userspace_pm_release,
471
+    .type            = MPTCP_PM_TYPE_BPF_USERSPACE,
472
+};
473
--
136
--
474
2.43.0
137
2.43.0
diff view generated by jsdifflib
1
From: Geliang Tang <tanggeliang@kylinos.cn>
1
From: Geliang Tang <tanggeliang@kylinos.cn>
2
2
3
This patch exports mptcp path manager helpers into BPF, adds these
3
The remote address of set_flags() interface is useful for userspace PM,
4
kfunc names into struct_ops kfunc_set and register this set with
4
but unused in in-kernel PM.
5
BPF_PROG_TYPE_STRUCT_OPS type.
5
6
But an additional "changed" parameter needs to be passed to set_flags() of
7
in-kernel PM. One option is to add a "u8 changed" parameter to set_flags()
8
interface:
9
10
    set_flags(struct mptcp_pm_addr_entry *local,
11
         struct mptcp_addr_info *remote,
12
         u8 changed)
13
14
A better option is to add a struct mptcp_pm_addr_entry "remote" parameter
15
for set_flags(), so that "remote->addr" can be used for userspace PM, and
16
"remote->flags" can be used for in-kernel PM to replace the additional
17
"changed" parameter.
6
18
7
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
19
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
8
---
20
---
9
net/mptcp/bpf.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++++
21
net/mptcp/pm.c | 20 ++++++++++++++++----
10
1 file changed, 96 insertions(+)
22
net/mptcp/pm_netlink.c | 1 +
23
net/mptcp/pm_userspace.c | 21 +++------------------
24
net/mptcp/protocol.h | 2 ++
25
4 files changed, 22 insertions(+), 22 deletions(-)
11
26
12
diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c
27
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
13
index XXXXXXX..XXXXXXX 100644
28
index XXXXXXX..XXXXXXX 100644
14
--- a/net/mptcp/bpf.c
29
--- a/net/mptcp/pm.c
15
+++ b/net/mptcp/bpf.c
30
+++ b/net/mptcp/pm.c
16
@@ -XXX,XX +XXX,XX @@
31
@@ -XXX,XX +XXX,XX @@ int mptcp_pm_nl_get_addr_dumpit(struct sk_buff *msg,
17
#include <linux/btf_ids.h>
32
static int mptcp_pm_set_flags(struct genl_info *info)
18
#include <net/bpf_sk_storage.h>
33
{
19
#include "protocol.h"
34
    struct mptcp_pm_addr_entry loc = { .addr = { .family = AF_UNSPEC }, };
20
+#include "mib.h"
35
-    struct nlattr *attr_loc;
21
36
+    struct mptcp_pm_addr_entry rem = { .addr = { .family = AF_UNSPEC }, };
22
#ifdef CONFIG_BPF_JIT
37
+    struct nlattr *attr_loc, *attr_rem;
23
static struct bpf_struct_ops bpf_mptcp_pm_ops;
38
    int ret = -EINVAL;
24
@@ -XXX,XX +XXX,XX @@ bpf_mptcp_subflow_ctx(const struct sock *sk)
39
25
    return NULL;
40
    if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_ADDR))
41
@@ -XXX,XX +XXX,XX @@ static int mptcp_pm_set_flags(struct genl_info *info)
42
    if (ret < 0)
43
        return ret;
44
45
-    if (info->attrs[MPTCP_PM_ATTR_TOKEN])
46
-        return mptcp_userspace_pm_set_flags(&loc, info);
47
-    return mptcp_pm_nl_set_flags(&loc, info);
48
+    if (info->attrs[MPTCP_PM_ATTR_TOKEN]) {
49
+        attr_rem = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE];
50
+        ret = mptcp_pm_parse_entry(attr_rem, info, false, &rem);
51
+        if (ret < 0)
52
+            return ret;
53
+
54
+        if (rem.addr.family == AF_UNSPEC) {
55
+            NL_SET_ERR_MSG_ATTR(info->extack, attr_rem,
56
+                     "invalid remote address family");
57
+            return -EINVAL;
58
+        }
59
+        return mptcp_userspace_pm_set_flags(&loc, &rem, info);
60
+    }
61
+    return mptcp_pm_nl_set_flags(&loc, &rem, info);
26
}
62
}
27
63
28
+__bpf_kfunc static struct sock *
64
int mptcp_pm_nl_set_flags_doit(struct sk_buff *skb, struct genl_info *info)
29
+bpf_mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow)
65
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
30
+{
66
index XXXXXXX..XXXXXXX 100644
31
+    if (!subflow)
67
--- a/net/mptcp/pm_netlink.c
32
+        return NULL;
68
+++ b/net/mptcp/pm_netlink.c
33
+
69
@@ -XXX,XX +XXX,XX @@ static void mptcp_nl_set_flags(struct net *net,
34
+    return mptcp_subflow_tcp_sock(subflow);
35
+}
36
+
37
__bpf_kfunc static int
38
bpf_iter_mptcp_subflow_new(struct bpf_iter_mptcp_subflow *it,
39
             struct mptcp_sock *msk)
40
@@ -XXX,XX +XXX,XX @@ __bpf_kfunc static bool bpf_ipv4_is_private_10(__be32 addr)
41
    return ipv4_is_private_10(addr);
42
}
70
}
43
71
44
+__bpf_kfunc static void bpf_list_add_tail_rcu(struct list_head *new,
72
int mptcp_pm_nl_set_flags(struct mptcp_pm_addr_entry *local,
45
+                     struct list_head *head)
73
+             struct mptcp_pm_addr_entry *remote,
46
+{
74
             struct genl_info *info)
47
+    list_add_tail_rcu(new, head);
48
+}
49
+
50
+__bpf_kfunc static void bpf_list_del_rcu(struct list_head *entry)
51
+{
52
+    list_del_rcu(entry);
53
+}
54
+
55
+__bpf_kfunc static struct mptcp_pm_addr_entry *
56
+bpf_sock_kmalloc_entry(struct sock *sk, int size, gfp_t priority)
57
+{
58
+    return sock_kmalloc(sk, size, priority);
59
+}
60
+
61
+__bpf_kfunc static void
62
+bpf_sock_kfree_entry(struct sock *sk, struct mptcp_pm_addr_entry *entry,
63
+         int size)
64
+{
65
+    sock_kfree_s(sk, entry, size);
66
+}
67
+
68
+__bpf_kfunc static void bpf_bitmap_zero(unsigned long *dst__ign, unsigned int nbits)
69
+{
70
+    bitmap_zero(dst__ign, nbits);
71
+}
72
+
73
+__bpf_kfunc static void bpf_set_bit(unsigned long nr, unsigned long *addr__ign)
74
+{
75
+    __set_bit(nr, addr__ign);
76
+}
77
+
78
+__bpf_kfunc static __u8 bpf_find_next_zero_bit(const unsigned long *addr__ign,
79
+                     unsigned long size, unsigned long offset)
80
+{
81
+    return find_next_zero_bit(addr__ign, size, offset);
82
+}
83
+
84
+__bpf_kfunc static int
85
+bpf_mptcp_subflow_connect(struct sock *sk,
86
+             const struct mptcp_pm_addr_entry *entry,
87
+             const struct mptcp_addr_info *remote)
88
+{
89
+    struct mptcp_pm_local local;
90
+
91
+    local.addr = entry->addr;
92
+    local.flags = entry->flags;
93
+    local.ifindex = entry->ifindex;
94
+
95
+    return __mptcp_subflow_connect(sk, &local, remote);
96
+}
97
+
98
+__bpf_kfunc static struct net *bpf_sock_net(const struct sock *sk)
99
+{
100
+    return sock_net(sk);
101
+}
102
+
103
+__bpf_kfunc static void BPF_MPTCP_INC_STATS(struct net *net,
104
+                     enum linux_mptcp_mib_field field)
105
+{
106
+    MPTCP_INC_STATS(net, field);
107
+}
108
+
109
__bpf_kfunc static bool bpf_mptcp_subflow_queues_empty(struct sock *sk)
110
{
75
{
111
    return tcp_rtx_queue_empty(sk);
76
    struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR];
112
@@ -XXX,XX +XXX,XX @@ __bpf_kfunc_end_defs();
77
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
113
78
index XXXXXXX..XXXXXXX 100644
114
BTF_KFUNCS_START(bpf_mptcp_common_kfunc_ids)
79
--- a/net/mptcp/pm_userspace.c
115
BTF_ID_FLAGS(func, bpf_mptcp_subflow_ctx, KF_RET_NULL)
80
+++ b/net/mptcp/pm_userspace.c
116
+BTF_ID_FLAGS(func, bpf_mptcp_subflow_tcp_sock, KF_RET_NULL)
81
@@ -XXX,XX +XXX,XX @@ int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info
117
BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_new, KF_ITER_NEW | KF_TRUSTED_ARGS)
82
}
118
BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_next, KF_ITER_NEXT | KF_RET_NULL)
83
119
BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_destroy, KF_ITER_DESTROY)
84
int mptcp_userspace_pm_set_flags(struct mptcp_pm_addr_entry *local,
120
@@ -XXX,XX +XXX,XX @@ BTF_ID_FLAGS(func, bpf_iter_mptcp_userspace_pm_addr_destroy, KF_ITER_DESTROY)
85
+                 struct mptcp_pm_addr_entry *remote,
121
BTF_ID_FLAGS(func, bpf_spin_lock_bh)
86
                 struct genl_info *info)
122
BTF_ID_FLAGS(func, bpf_spin_unlock_bh)
87
{
123
BTF_ID_FLAGS(func, bpf_ipv4_is_private_10)
88
-    struct mptcp_addr_info rem = { .family = AF_UNSPEC, };
124
+BTF_ID_FLAGS(func, bpf_list_add_tail_rcu)
89
    struct mptcp_pm_addr_entry *entry;
125
+BTF_ID_FLAGS(func, bpf_list_del_rcu)
90
-    struct nlattr *attr, *attr_rem;
126
+BTF_ID_FLAGS(func, bpf_sock_kmalloc_entry)
91
    struct mptcp_sock *msk;
127
+BTF_ID_FLAGS(func, bpf_sock_kfree_entry)
92
+    struct nlattr *attr;
128
+BTF_ID_FLAGS(func, mptcp_pm_alloc_anno_list)
93
    int ret = -EINVAL;
129
+BTF_ID_FLAGS(func, mptcp_pm_announce_addr)
94
    struct sock *sk;
130
+BTF_ID_FLAGS(func, mptcp_pm_nl_addr_send_ack, KF_SLEEPABLE)
95
    u8 bkup = 0;
131
+BTF_ID_FLAGS(func, bpf_bitmap_zero)
96
132
+BTF_ID_FLAGS(func, bpf_set_bit)
97
-    if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_ADDR_REMOTE))
133
+BTF_ID_FLAGS(func, bpf_find_next_zero_bit)
98
-        return ret;
134
+BTF_ID_FLAGS(func, mptcp_pm_remove_addr)
99
-
135
+BTF_ID_FLAGS(func, mptcp_pm_remove_addr_entry, KF_SLEEPABLE)
100
    msk = mptcp_userspace_pm_get_sock(info);
136
+BTF_ID_FLAGS(func, bpf_mptcp_subflow_connect, KF_SLEEPABLE)
101
    if (!msk)
137
+BTF_ID_FLAGS(func, mptcp_subflow_shutdown, KF_SLEEPABLE)
102
        return ret;
138
+BTF_ID_FLAGS(func, mptcp_close_ssk, KF_SLEEPABLE)
103
@@ -XXX,XX +XXX,XX @@ int mptcp_userspace_pm_set_flags(struct mptcp_pm_addr_entry *local,
139
+BTF_ID_FLAGS(func, bpf_sock_net)
104
        goto set_flags_err;
140
+BTF_ID_FLAGS(func, BPF_MPTCP_INC_STATS)
105
    }
141
+BTF_ID_FLAGS(func, mptcp_pm_nl_mp_prio_send_ack, KF_SLEEPABLE)
106
142
BTF_ID_FLAGS(func, bpf_mptcp_sock_acquire, KF_ACQUIRE | KF_RET_NULL)
107
-    attr_rem = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE];
143
BTF_ID_FLAGS(func, bpf_mptcp_sock_release, KF_RELEASE)
108
-    ret = mptcp_pm_parse_addr(attr_rem, info, &rem);
144
BTF_KFUNCS_END(bpf_mptcp_common_kfunc_ids)
109
-    if (ret < 0)
145
@@ -XXX,XX +XXX,XX @@ static int __init bpf_mptcp_kfunc_init(void)
110
-        goto set_flags_err;
146
    ret = register_btf_fmodret_id_set(&bpf_mptcp_fmodret_set);
111
-
147
    ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCKOPT,
112
-    if (rem.family == AF_UNSPEC) {
148
                     &bpf_mptcp_common_kfunc_set);
113
-        NL_SET_ERR_MSG_ATTR(info->extack, attr_rem,
149
+    ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
114
-                 "invalid remote address family");
150
+                     &bpf_mptcp_common_kfunc_set);
115
-        ret = -EINVAL;
151
    ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
116
-        goto set_flags_err;
152
                     &bpf_mptcp_sched_kfunc_set);
117
-    }
153
#ifdef CONFIG_BPF_JIT
118
-
119
    if (local->flags & MPTCP_PM_ADDR_FLAG_BACKUP)
120
        bkup = 1;
121
122
@@ -XXX,XX +XXX,XX @@ int mptcp_userspace_pm_set_flags(struct mptcp_pm_addr_entry *local,
123
    spin_unlock_bh(&msk->pm.lock);
124
125
    lock_sock(sk);
126
-    ret = mptcp_pm_nl_mp_prio_send_ack(msk, &local->addr, &rem, bkup);
127
+    ret = mptcp_pm_nl_mp_prio_send_ack(msk, &local->addr, &remote->addr, bkup);
128
    release_sock(sk);
129
130
    /* mptcp_pm_nl_mp_prio_send_ack() only fails in one case */
131
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
132
index XXXXXXX..XXXXXXX 100644
133
--- a/net/mptcp/protocol.h
134
+++ b/net/mptcp/protocol.h
135
@@ -XXX,XX +XXX,XX @@ bool mptcp_lookup_subflow_by_saddr(const struct list_head *list,
136
bool mptcp_remove_anno_list_by_saddr(struct mptcp_sock *msk,
137
                 const struct mptcp_addr_info *addr);
138
int mptcp_pm_nl_set_flags(struct mptcp_pm_addr_entry *local,
139
+             struct mptcp_pm_addr_entry *remote,
140
             struct genl_info *info);
141
int mptcp_userspace_pm_set_flags(struct mptcp_pm_addr_entry *local,
142
+                 struct mptcp_pm_addr_entry *remote,
143
                 struct genl_info *info);
144
int mptcp_pm_announce_addr(struct mptcp_sock *msk,
145
             const struct mptcp_addr_info *addr,
154
--
146
--
155
2.43.0
147
2.43.0
diff view generated by jsdifflib
1
From: Geliang Tang <tanggeliang@kylinos.cn>
1
From: Geliang Tang <tanggeliang@kylinos.cn>
2
2
3
This patch implements a new struct bpf_struct_ops for MPTCP BPF path
3
To drop the additional "changed" parameter of mptcp_nl_set_flags(),
4
manager: bpf_mptcp_pm_ops. Register and unregister the bpf path manager
4
store "entry->flags" to "remote->flags" before modifying it in
5
in .reg and .unreg.
5
mptcp_pm_nl_set_flags(), so that "changed" value can be obtained by
6
6
comparing "local->flags" and "remote->flags" in mptcp_nl_set_flags().
7
Add write access for some fields of struct mptcp_addr_info, struct
8
mptcp_pm_addr_entry and struct mptcp_sock in .btf_struct_access.
9
10
This MPTCP BPF path manager implementation is similar to BPF TCP CC. And
11
net/ipv4/bpf_tcp_ca.c is a frame of reference for this patch.
12
7
13
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
8
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
14
---
9
---
15
net/mptcp/bpf.c | 276 +++++++++++++++++++++++++++++++++++++++++++++++-
10
net/mptcp/pm_netlink.c | 8 ++++++--
16
1 file changed, 275 insertions(+), 1 deletion(-)
11
1 file changed, 6 insertions(+), 2 deletions(-)
17
12
18
diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c
13
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
19
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
20
--- a/net/mptcp/bpf.c
15
--- a/net/mptcp/pm_netlink.c
21
+++ b/net/mptcp/bpf.c
16
+++ b/net/mptcp/pm_netlink.c
22
@@ -XXX,XX +XXX,XX @@
17
@@ -XXX,XX +XXX,XX @@ static void mptcp_pm_nl_fullmesh(struct mptcp_sock *msk,
23
#include "protocol.h"
18
24
19
static void mptcp_nl_set_flags(struct net *net,
25
#ifdef CONFIG_BPF_JIT
20
             struct mptcp_pm_addr_entry *local,
26
+static struct bpf_struct_ops bpf_mptcp_pm_ops;
21
-             u8 changed)
27
+static u32 mptcp_sock_id,
22
+             struct mptcp_pm_addr_entry *remote)
28
+     mptcp_entry_id;
23
{
29
+
24
    u8 is_subflow = !!(local->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW);
30
+/* MPTCP BPF path manager */
25
    u8 bkup = !!(local->flags & MPTCP_PM_ADDR_FLAG_BACKUP);
31
+
26
+    u8 changed, mask = MPTCP_PM_ADDR_FLAG_BACKUP |
32
+static const struct bpf_func_proto *
27
+             MPTCP_PM_ADDR_FLAG_FULLMESH;
33
+bpf_mptcp_pm_get_func_proto(enum bpf_func_id func_id,
28
    long s_slot = 0, s_num = 0;
34
+             const struct bpf_prog *prog)
29
    struct mptcp_sock *msk;
35
+{
30
36
+    switch (func_id) {
31
+    changed = (local->flags ^ remote->flags) & mask;
37
+    case BPF_FUNC_sk_storage_get:
32
    if (changed == MPTCP_PM_ADDR_FLAG_FULLMESH && !is_subflow)
38
+        return &bpf_sk_storage_get_proto;
33
        return;
39
+    case BPF_FUNC_sk_storage_delete:
34
40
+        return &bpf_sk_storage_delete_proto;
35
@@ -XXX,XX +XXX,XX @@ int mptcp_pm_nl_set_flags(struct mptcp_pm_addr_entry *local,
41
+    default:
36
        return -EINVAL;
42
+        return bpf_base_func_proto(func_id, prog);
37
    }
43
+    }
38
44
+}
39
+    remote->flags = entry->flags;
45
+
40
    changed = (local->flags ^ entry->flags) & mask;
46
+static int bpf_mptcp_pm_btf_struct_access(struct bpf_verifier_log *log,
41
    entry->flags = (entry->flags & ~mask) | (local->flags & mask);
47
+                     const struct bpf_reg_state *reg,
42
    *local = *entry;
48
+                     int off, int size)
43
    spin_unlock_bh(&pernet->lock);
49
+{
44
50
+    u32 id = reg->btf_id;
45
-    mptcp_nl_set_flags(net, local, changed);
51
+    size_t end;
46
+    mptcp_nl_set_flags(net, local, remote);
52
+
47
    return 0;
53
+    if (id == mptcp_sock_id) {
48
}
54
+        switch (off) {
55
+        case offsetof(struct mptcp_sock, pm.add_addr_signaled):
56
+            end = offsetofend(struct mptcp_sock, pm.add_addr_signaled);
57
+            break;
58
+        case offsetof(struct mptcp_sock, pm.local_addr_used):
59
+            end = offsetofend(struct mptcp_sock, pm.local_addr_used);
60
+            break;
61
+        case offsetof(struct mptcp_sock, pm.subflows):
62
+            end = offsetofend(struct mptcp_sock, pm.subflows);
63
+            break;
64
+        default:
65
+            bpf_log(log, "no write support to mptcp_sock at off %d\n",
66
+                off);
67
+            return -EACCES;
68
+        }
69
+    } else if (id == mptcp_entry_id) {
70
+        switch (off) {
71
+        case offsetof(struct mptcp_pm_addr_entry, addr.id):
72
+            end = offsetofend(struct mptcp_pm_addr_entry, addr.id);
73
+            break;
74
+        case offsetof(struct mptcp_pm_addr_entry, addr.family):
75
+            end = offsetofend(struct mptcp_pm_addr_entry, addr.family);
76
+            break;
77
+        case offsetof(struct mptcp_pm_addr_entry, addr.port):
78
+            end = offsetofend(struct mptcp_pm_addr_entry, addr.port);
79
+            break;
80
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
81
+        case offsetof(struct mptcp_pm_addr_entry, addr.addr6.s6_addr32[0]):
82
+            end = offsetofend(struct mptcp_pm_addr_entry, addr.addr6.s6_addr32[0]);
83
+            break;
84
+        case offsetof(struct mptcp_pm_addr_entry, addr.addr6.s6_addr32[1]):
85
+            end = offsetofend(struct mptcp_pm_addr_entry, addr.addr6.s6_addr32[1]);
86
+            break;
87
+        case offsetof(struct mptcp_pm_addr_entry, addr.addr6.s6_addr32[2]):
88
+            end = offsetofend(struct mptcp_pm_addr_entry, addr.addr6.s6_addr32[2]);
89
+            break;
90
+        case offsetof(struct mptcp_pm_addr_entry, addr.addr6.s6_addr32[3]):
91
+            end = offsetofend(struct mptcp_pm_addr_entry, addr.addr6.s6_addr32[3]);
92
+            break;
93
+#else
94
+        case offsetof(struct mptcp_pm_addr_entry, addr.addr.s_addr):
95
+            end = offsetofend(struct mptcp_pm_addr_entry, addr.addr.s_addr);
96
+            break;
97
+#endif
98
+        case offsetof(struct mptcp_pm_addr_entry, flags):
99
+            end = offsetofend(struct mptcp_pm_addr_entry, flags);
100
+            break;
101
+        case offsetof(struct mptcp_pm_addr_entry, ifindex):
102
+            end = offsetofend(struct mptcp_pm_addr_entry, ifindex);
103
+            break;
104
+        default:
105
+            bpf_log(log, "no write support to mptcp_pm_addr_entry at off %d\n",
106
+                off);
107
+            return -EACCES;
108
+        }
109
+    } else {
110
+        bpf_log(log, "only access to mptcp sock or addr or entry is supported\n");
111
+        return -EACCES;
112
+    }
113
+
114
+    if (off + size > end) {
115
+        bpf_log(log, "access beyond %s at off %u size %u ended at %zu",
116
+            id == mptcp_sock_id ? "mptcp_sock" :
117
+            (id == mptcp_entry_id ? "mptcp_pm_addr_entry" : "mptcp_addr_info"),
118
+            off, size, end);
119
+        return -EACCES;
120
+    }
121
+
122
+    return NOT_INIT;
123
+}
124
+
125
+static const struct bpf_verifier_ops bpf_mptcp_pm_verifier_ops = {
126
+    .get_func_proto        = bpf_mptcp_pm_get_func_proto,
127
+    .is_valid_access    = bpf_tracing_btf_ctx_access,
128
+    .btf_struct_access    = bpf_mptcp_pm_btf_struct_access,
129
+};
130
+
131
+static int bpf_mptcp_pm_reg(void *kdata, struct bpf_link *link)
132
+{
133
+    return mptcp_pm_register(kdata);
134
+}
135
+
136
+static void bpf_mptcp_pm_unreg(void *kdata, struct bpf_link *link)
137
+{
138
+    mptcp_pm_unregister(kdata);
139
+}
140
+
141
+static int bpf_mptcp_pm_check_member(const struct btf_type *t,
142
+                 const struct btf_member *member,
143
+                 const struct bpf_prog *prog)
144
+{
145
+    return 0;
146
+}
147
+
148
+static int bpf_mptcp_pm_init_member(const struct btf_type *t,
149
+                 const struct btf_member *member,
150
+                 void *kdata, const void *udata)
151
+{
152
+    const struct mptcp_pm_ops *upm;
153
+    struct mptcp_pm_ops *pm;
154
+    u32 moff;
155
+
156
+    upm = (const struct mptcp_pm_ops *)udata;
157
+    pm = (struct mptcp_pm_ops *)kdata;
158
+
159
+    moff = __btf_member_bit_offset(t, member) / 8;
160
+    switch (moff) {
161
+    case offsetof(struct mptcp_pm_ops, type):
162
+        pm->type = upm->type;
163
+        return 1;
164
+    }
165
+
166
+    return 0;
167
+}
168
+
169
+static int bpf_mptcp_pm_init(struct btf *btf)
170
+{
171
+    s32 type_id;
172
+
173
+    type_id = btf_find_by_name_kind(btf, "mptcp_sock",
174
+                    BTF_KIND_STRUCT);
175
+    if (type_id < 0)
176
+        return -EINVAL;
177
+    mptcp_sock_id = type_id;
178
+
179
+    type_id = btf_find_by_name_kind(btf, "mptcp_pm_addr_entry",
180
+                    BTF_KIND_STRUCT);
181
+    if (type_id < 0)
182
+        return -EINVAL;
183
+    mptcp_entry_id = type_id;
184
+
185
+    return 0;
186
+}
187
+
188
+static int bpf_mptcp_pm_validate(void *kdata)
189
+{
190
+    return mptcp_pm_validate(kdata);
191
+}
192
+
193
+static int __bpf_mptcp_pm_address_created(struct mptcp_sock *msk)
194
+{
195
+    return 0;
196
+}
197
+
198
+static int __bpf_mptcp_pm_address_established(struct mptcp_sock *msk)
199
+{
200
+    return 0;
201
+}
202
+
203
+static int __bpf_mptcp_pm_address_closed(struct mptcp_sock *msk)
204
+{
205
+    return 0;
206
+}
207
+
208
+static int __bpf_mptcp_pm_address_announced(struct mptcp_sock *msk,
209
+                     struct mptcp_pm_addr_entry *addr)
210
+{
211
+    return 0;
212
+}
213
+
214
+static int __bpf_mptcp_pm_address_removed(struct mptcp_sock *msk, u8 id)
215
+{
216
+    return 0;
217
+}
218
+
219
+static int __bpf_mptcp_pm_subflow_established(struct mptcp_sock *msk,
220
+                     struct mptcp_pm_addr_entry *local,
221
+                     struct mptcp_addr_info *remote)
222
+{
223
+    return 0;
224
+}
225
+
226
+static int __bpf_mptcp_pm_subflow_closed(struct mptcp_sock *msk,
227
+                     struct mptcp_pm_addr_entry *local,
228
+                     struct mptcp_addr_info *remote)
229
+{
230
+    return 0;
231
+}
232
+
233
+static int __bpf_mptcp_pm_get_local_id(struct mptcp_sock *msk,
234
+                 struct mptcp_pm_addr_entry *skc)
235
+{
236
+    return 0;
237
+}
238
+
239
+static bool __bpf_mptcp_pm_get_priority(struct mptcp_sock *msk,
240
+                    struct mptcp_addr_info *skc)
241
+{
242
+    return 0;
243
+}
244
+
245
+static int __bpf_mptcp_pm_set_priority(struct mptcp_sock *msk,
246
+                 struct mptcp_pm_addr_entry *local,
247
+                 struct mptcp_addr_info *remote)
248
+{
249
+    return 0;
250
+}
251
+
252
+static int __bpf_mptcp_pm_address_listener_created(struct mptcp_sock *msk)
253
+{
254
+    return 0;
255
+}
256
+
257
+static int __bpf_mptcp_pm_address_listener_closed(struct mptcp_sock *msk)
258
+{
259
+    return 0;
260
+}
261
+
262
+static void __bpf_mptcp_pm_init(struct mptcp_sock *msk)
263
+{
264
+}
265
+
266
+static void __bpf_mptcp_pm_release(struct mptcp_sock *msk)
267
+{
268
+}
269
+
270
+static struct mptcp_pm_ops __bpf_mptcp_pm_ops = {
271
+    .created        = __bpf_mptcp_pm_address_created,
272
+    .established        = __bpf_mptcp_pm_address_established,
273
+    .closed            = __bpf_mptcp_pm_address_closed,
274
+    .address_announced    = __bpf_mptcp_pm_address_announced,
275
+    .address_removed    = __bpf_mptcp_pm_address_removed,
276
+    .subflow_established    = __bpf_mptcp_pm_subflow_established,
277
+    .subflow_closed        = __bpf_mptcp_pm_subflow_closed,
278
+    .get_local_id        = __bpf_mptcp_pm_get_local_id,
279
+    .get_priority        = __bpf_mptcp_pm_get_priority,
280
+    .set_priority        = __bpf_mptcp_pm_set_priority,
281
+    .listener_created    = __bpf_mptcp_pm_address_listener_created,
282
+    .listener_closed    = __bpf_mptcp_pm_address_listener_closed,
283
+    .init            = __bpf_mptcp_pm_init,
284
+    .release        = __bpf_mptcp_pm_release,
285
+};
286
+
287
+static struct bpf_struct_ops bpf_mptcp_pm_ops = {
288
+    .verifier_ops    = &bpf_mptcp_pm_verifier_ops,
289
+    .reg        = bpf_mptcp_pm_reg,
290
+    .unreg        = bpf_mptcp_pm_unreg,
291
+    .check_member    = bpf_mptcp_pm_check_member,
292
+    .init_member    = bpf_mptcp_pm_init_member,
293
+    .init        = bpf_mptcp_pm_init,
294
+    .validate    = bpf_mptcp_pm_validate,
295
+    .name        = "mptcp_pm_ops",
296
+    .cfi_stubs    = &__bpf_mptcp_pm_ops,
297
+};
298
+
299
static struct bpf_struct_ops bpf_mptcp_sched_ops;
300
static const struct btf_type *mptcp_sock_type, *mptcp_subflow_type __read_mostly;
301
-static u32 mptcp_sock_id, mptcp_subflow_id;
302
+static u32 mptcp_subflow_id;
303
304
static const struct bpf_func_proto *
305
bpf_mptcp_sched_get_func_proto(enum bpf_func_id func_id,
306
@@ -XXX,XX +XXX,XX @@ static int __init bpf_mptcp_kfunc_init(void)
307
    ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
308
                     &bpf_mptcp_sched_kfunc_set);
309
#ifdef CONFIG_BPF_JIT
310
+    ret = ret ?: register_bpf_struct_ops(&bpf_mptcp_pm_ops, mptcp_pm_ops);
311
    ret = ret ?: register_bpf_struct_ops(&bpf_mptcp_sched_ops, mptcp_sched_ops);
312
#endif
313
49
314
--
50
--
315
2.43.0
51
2.43.0
diff view generated by jsdifflib