1
From: Geliang Tang <tanggeliang@kylinos.cn>
1
From: Geliang Tang <tanggeliang@kylinos.cn>
2
2
3
Address Martin's suggestions for v2.
3
v5:
4
- check bpf_iter_task in mptcp_subflow_new() as Mat suggested.
4
5
5
Geliang Tang (3):
6
v4:
7
- drop sock_owned_by_user_nocheck and spin_is_locked. According to
8
comments from Mat and Martin, in this set mptcp_subflow
9
bpf_iter only used from a cg sockopt bpf prog, no need to add these
10
check at this moment.
11
12
v3:
13
- patch 3, continue to use sock_owned_by_user_nocheck() and spin_is_locked()
14
checks instead of using msk_owned_by_me().
15
- patch 5, drop declaration of bpf_mptcp_subflow_tcp_sock. It's no longer
16
used.
17
- patch 5, update the comment for mptcp_subflow_tcp_sock(), which is a BPF
18
helper, not a kfunc.
19
20
The commit log of "bpf: Register mptcp common kfunc set" doesn't match the
21
code, please update it as:
22
23
'''
24
bpf: Register mptcp common kfunc set
25
26
MPTCP helper mptcp_subflow_ctx() is used to convert struct sock to
27
struct mptcp_subflow_context. It will be used in MPTCP BPF programs.
28
29
This patch defines corresponding wrapper of this helper, and put it
30
into the newly defined mptcp common kfunc set and register this set
31
with the flag BPF_PROG_TYPE_CGROUP_SOCKOPT to let it accessible to
32
the 'cgroup/getsockopt' type of BPF programs.
33
'''
34
35
v2:
36
- Drop bpf_skc_to_mptcp_sock
37
- Check the owner before assigning the msk as Mat suggested.
38
- Use bpf_core_cast() in mptcp_subflow bpf_iter subtest instead of
39
using bpf_skc_to_mptcp_sock().
40
41
Address Martin's suggestions for "Add mptcp_subflow bpf_iter support" v2.
42
43
Geliang Tang (5):
44
mptcp: add bpf_iter_task for mptcp_sock
45
Squash to "bpf: Extend bpf_skc_to_mptcp_sock to MPTCP sock"
6
Squash to "bpf: Add mptcp_subflow bpf_iter"
46
Squash to "bpf: Add mptcp_subflow bpf_iter"
7
Revert "bpf: Acquire and release mptcp socket"
47
Revert "bpf: Acquire and release mptcp socket"
8
Squash to "selftests/bpf: Add mptcp_subflow bpf_iter subtest"
48
Squash to "selftests/bpf: Add mptcp_subflow bpf_iter subtest"
9
49
10
net/mptcp/bpf.c | 34 +++++--------------
50
net/mptcp/bpf.c | 56 +++++++++----------
51
net/mptcp/protocol.c | 1 +
52
net/mptcp/protocol.h | 16 ++++++
11
.../testing/selftests/bpf/bpf_experimental.h | 2 +-
53
.../testing/selftests/bpf/bpf_experimental.h | 2 +-
12
tools/testing/selftests/bpf/progs/mptcp_bpf.h | 3 --
54
tools/testing/selftests/bpf/progs/mptcp_bpf.h | 5 --
13
.../selftests/bpf/progs/mptcp_bpf_iters.c | 6 +---
55
.../selftests/bpf/progs/mptcp_bpf_iters.c | 8 +--
14
4 files changed, 11 insertions(+), 34 deletions(-)
56
6 files changed, 47 insertions(+), 41 deletions(-)
15
57
16
--
58
--
17
2.43.0
59
2.43.0
diff view generated by jsdifflib
New patch
1
From: Geliang Tang <tanggeliang@kylinos.cn>
1
2
3
To make sure the mptcp_subflow bpf_iter is running in the
4
MPTCP context. This patch adds a simplified version of tracking
5
for it:
6
7
1. Add a 'struct task_struct *bpf_iter_task' field to struct
8
mptcp_sock.
9
10
2. Do a WRITE_ONCE(msk->bpf_iter_task, current) before calling
11
a MPTCP BPF hook, and WRITE_ONCE(msk->bpf_iter_task, NULL) after
12
the hook returns.
13
14
3. In bpf_iter_mptcp_subflow_new(), check
15
16
    "READ_ONCE(msk->bpf_scheduler_task) == current"
17
18
to confirm the correct task, return -EINVAL if it doesn't match.
19
20
Also creates helpers for setting, clearing and checking that value.
21
22
Suggested-by: Mat Martineau <martineau@kernel.org>
23
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
24
---
25
net/mptcp/protocol.c | 1 +
26
net/mptcp/protocol.h | 16 ++++++++++++++++
27
2 files changed, 17 insertions(+)
28
29
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
30
index XXXXXXX..XXXXXXX 100644
31
--- a/net/mptcp/protocol.c
32
+++ b/net/mptcp/protocol.c
33
@@ -XXX,XX +XXX,XX @@ static void __mptcp_init_sock(struct sock *sk)
34
    msk->scaling_ratio = TCP_DEFAULT_SCALING_RATIO;
35
36
    WRITE_ONCE(msk->first, NULL);
37
+    WRITE_ONCE(msk->bpf_iter_task, NULL);
38
    inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;
39
    WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
40
    WRITE_ONCE(msk->allow_infinite_fallback, true);
41
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
42
index XXXXXXX..XXXXXXX 100644
43
--- a/net/mptcp/protocol.h
44
+++ b/net/mptcp/protocol.h
45
@@ -XXX,XX +XXX,XX @@ struct mptcp_sock {
46
    struct list_head conn_list;
47
    struct list_head rtx_queue;
48
    struct mptcp_data_frag *first_pending;
49
+    struct task_struct *bpf_iter_task;
50
    struct list_head join_list;
51
    struct sock    *first; /* The mptcp ops can safely dereference, using suitable
52
                 * ONCE annotation, the subflow outside the socket
53
@@ -XXX,XX +XXX,XX @@ mptcp_token_join_cookie_init_state(struct mptcp_subflow_request_sock *subflow_re
54
static inline void mptcp_join_cookie_init(void) {}
55
#endif
56
57
+static inline void mptcp_set_bpf_iter_task(struct mptcp_sock *msk)
58
+{
59
+    WRITE_ONCE(msk->bpf_iter_task, current);
60
+}
61
+
62
+static inline void mptcp_clear_bpf_iter_task(struct mptcp_sock *msk)
63
+{
64
+    WRITE_ONCE(msk->bpf_iter_task, NULL);
65
+}
66
+
67
+static inline struct task_struct *mptcp_get_bpf_iter_task(struct mptcp_sock *msk)
68
+{
69
+    return READ_ONCE(msk->bpf_iter_task);
70
+}
71
+
72
#endif /* __MPTCP_PROTOCOL_H */
73
--
74
2.43.0
diff view generated by jsdifflib
New patch
1
From: Geliang Tang <tanggeliang@kylinos.cn>
1
2
3
Set msk->bpf_iter_task in bpf_mptcp_sock_from_sock() to allow
4
mptcp_subflow bpt_iter can be used in cgroup/getsockopt,
5
otherwise, the selftest in this set fails.
6
7
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
8
---
9
net/mptcp/bpf.c | 16 ++++++++++++----
10
1 file changed, 12 insertions(+), 4 deletions(-)
11
12
diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/net/mptcp/bpf.c
15
+++ b/net/mptcp/bpf.c
16
@@ -XXX,XX +XXX,XX @@ static struct bpf_struct_ops bpf_mptcp_sched_ops = {
17
18
struct mptcp_sock *bpf_mptcp_sock_from_sock(struct sock *sk)
19
{
20
+    struct mptcp_sock *msk;
21
+
22
    if (unlikely(!sk || !sk_fullsock(sk)))
23
        return NULL;
24
25
-    if (sk->sk_protocol == IPPROTO_MPTCP)
26
-        return mptcp_sk(sk);
27
+    if (sk->sk_protocol == IPPROTO_MPTCP) {
28
+        msk = mptcp_sk(sk);
29
+        mptcp_set_bpf_iter_task(msk);
30
+        return msk;
31
+    }
32
33
-    if (sk->sk_protocol == IPPROTO_TCP && sk_is_mptcp(sk))
34
-        return mptcp_sk(mptcp_subflow_ctx(sk)->conn);
35
+    if (sk->sk_protocol == IPPROTO_TCP && sk_is_mptcp(sk)) {
36
+        msk = mptcp_sk(mptcp_subflow_ctx(sk)->conn);
37
+        mptcp_set_bpf_iter_task(msk);
38
+        return msk;
39
+    }
40
41
    return NULL;
42
}
43
--
44
2.43.0
diff view generated by jsdifflib
1
From: Geliang Tang <tanggeliang@kylinos.cn>
1
From: Geliang Tang <tanggeliang@kylinos.cn>
2
2
3
Drop the NULL check as Martin suggested.
3
Drop the NULL check for 'msk' as Martin suggested, add more checks
4
for 'sk'.
4
5
5
Use the "struct sock *sk" instead of "struct mptcp-sock *msk" as the
6
Use the "struct sock *sk" instead of "struct mptcp-sock *msk" as the
6
argument in the bpf_iter_mptcp_subflow_new as Martin suggested.
7
argument in the bpf_iter_mptcp_subflow_new as Martin suggested.
7
8
8
Use msk_owned_by_me().
9
v5:
10
- check bpf_iter_task in mptcp_subflow_new()
11
12
v4:
13
- drop sock_owned_by_user_nocheck and spin_is_locked. According to
14
comments from Mat [2] and Martin [1], in this set mptcp_subflow
15
bpf_iter only used from a cg sockopt bpf prog, no need to add these
16
check at this moment.
17
18
[1]
19
https://lore.kernel.org/all/fdf0ddbe-e007-4a5f-bbdf-9a144e8fbe35@linux.dev/
20
[2]
21
https://patchwork.kernel.org/project/mptcp/patch/f6469225598beecbf0bda12a4c33fafa86c0ff15.1739787744.git.tanggeliang@kylinos.cn/
22
23
v3:
24
- continue to use sock_owned_by_user_nocheck and spin_is_locked
25
checks instead of using msk_owned_by_me().
26
27
v2:
28
- check the owner before assigning the msk as Mat suggested.
9
29
10
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
30
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
11
---
31
---
12
net/mptcp/bpf.c | 15 +++++++++------
32
net/mptcp/bpf.c | 21 +++++++++++++++------
13
1 file changed, 9 insertions(+), 6 deletions(-)
33
1 file changed, 15 insertions(+), 6 deletions(-)
14
34
15
diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c
35
diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c
16
index XXXXXXX..XXXXXXX 100644
36
index XXXXXXX..XXXXXXX 100644
17
--- a/net/mptcp/bpf.c
37
--- a/net/mptcp/bpf.c
18
+++ b/net/mptcp/bpf.c
38
+++ b/net/mptcp/bpf.c
...
...
23
-             struct mptcp_sock *msk)
43
-             struct mptcp_sock *msk)
24
+             struct sock *sk)
44
+             struct sock *sk)
25
{
45
{
26
    struct bpf_iter_mptcp_subflow_kern *kit = (void *)it;
46
    struct bpf_iter_mptcp_subflow_kern *kit = (void *)it;
27
-    struct sock *sk = (struct sock *)msk;
47
-    struct sock *sk = (struct sock *)msk;
48
+    struct task_struct *task;
28
+    struct mptcp_sock *msk;
49
+    struct mptcp_sock *msk;
29
50
30
    BUILD_BUG_ON(sizeof(struct bpf_iter_mptcp_subflow_kern) >
51
    BUILD_BUG_ON(sizeof(struct bpf_iter_mptcp_subflow_kern) >
31
         sizeof(struct bpf_iter_mptcp_subflow));
52
         sizeof(struct bpf_iter_mptcp_subflow));
32
    BUILD_BUG_ON(__alignof__(struct bpf_iter_mptcp_subflow_kern) !=
53
    BUILD_BUG_ON(__alignof__(struct bpf_iter_mptcp_subflow_kern) !=
...
...
41
-     !spin_is_locked(&sk->sk_lock.slock))
62
-     !spin_is_locked(&sk->sk_lock.slock))
42
+    if (sk->sk_protocol != IPPROTO_MPTCP)
63
+    if (sk->sk_protocol != IPPROTO_MPTCP)
43
        return -EINVAL;
64
        return -EINVAL;
44
65
45
+    msk = mptcp_sk(sk);
66
+    msk = mptcp_sk(sk);
46
+    kit->msk = msk;
67
+    task = mptcp_get_bpf_iter_task(msk);
68
+    if (!task || task != current)
69
+        return -EINVAL;
70
+
71
+    mptcp_clear_bpf_iter_task(msk);
47
+
72
+
48
+    msk_owned_by_me(msk);
73
+    msk_owned_by_me(msk);
49
+
74
+
75
+    kit->msk = msk;
50
    kit->pos = &msk->conn_list;
76
    kit->pos = &msk->conn_list;
51
    return 0;
77
    return 0;
52
}
78
}
53
--
79
--
54
2.43.0
80
2.43.0
diff view generated by jsdifflib
1
From: Geliang Tang <tanggeliang@kylinos.cn>
1
From: Geliang Tang <tanggeliang@kylinos.cn>
2
2
3
Drop this patch as Martin suggested.
3
Drop this patch as Martin suggested.
4
5
From Martin's review [1], this mptcp_sock_acquire() helper was a workaround
6
only to please the verifier, but they were not needed.
7
8
[1]
9
https://lore.kernel.org/9b373a23-c093-42d8-b4ae-99f2e62e7681@linux.dev
4
10
5
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
11
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
6
---
12
---
7
net/mptcp/bpf.c | 19 -------------------
13
net/mptcp/bpf.c | 19 -------------------
8
1 file changed, 19 deletions(-)
14
1 file changed, 19 deletions(-)
...
...
diff view generated by jsdifflib
1
From: Geliang Tang <tanggeliang@kylinos.cn>
1
From: Geliang Tang <tanggeliang@kylinos.cn>
2
2
3
Drop bpf_mptcp_sock_acquire/release.
3
Use bpf_core_cast() instead of bpf_skc_to_mptcp_sock().
4
Change the 2nd parameter type of bpf_for_each() as 'struct sock'.
5
Drop use of bpf_mptcp_sock_acquire/release.
6
Drop declaration of bpf_mptcp_subflow_tcp_sock. It's no longer used.
7
Update the comment for mptcp_subflow_tcp_sock(), which is a BPF helper,
8
not a kfunc.
9
10
Please update the commit log as:
11
12
'''
13
This patch adds a "cgroup/getsockopt" program "iters_subflow" to test the
14
newly added mptcp_subflow bpf_iter.
15
16
Export mptcp_subflow helpers bpf_iter_mptcp_subflow_new/_next/_destroy
17
and other helpers into bpf_experimental.h.
18
19
Use bpf_for_each() to walk the subflow list of this msk. MPTCP-specific
20
packet scheduler kfunc can be called in the loop. In this test, just
21
add all subflow ids to local variable local_ids, then invoke the helper
22
mptcp_subflow_tcp_sock() in the loop to pick a subsocket.
23
24
Out of the loop, use bpf_mptcp_subflow_ctx() to get the subflow context
25
of the picked subsocket and do some verification. Finally, assign
26
local_ids to global variable ids so that the application can obtain this
27
value.
28
29
Add a subtest named test_iters_subflow to load and verify the newly added
30
mptcp_subflow type bpf_iter example in test_mptcp. Use the helper
31
endpoint_init() to add 3 new subflow endpoints. Send a byte of message
32
to start the mptcp connection, and wait for new subflows to be added.
33
getsockopt() is invoked to trigger the "cgroup/getsockopt" test program
34
"iters_subflow". Check if skel->bss->ids equals 10 to verify whether this
35
mptcp_subflow bpf_iter loops correctly as expected.
36
'''
4
37
5
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
38
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
6
---
39
---
7
tools/testing/selftests/bpf/bpf_experimental.h | 2 +-
40
tools/testing/selftests/bpf/bpf_experimental.h | 2 +-
8
tools/testing/selftests/bpf/progs/mptcp_bpf.h | 3 ---
41
tools/testing/selftests/bpf/progs/mptcp_bpf.h | 5 -----
9
tools/testing/selftests/bpf/progs/mptcp_bpf_iters.c | 6 +-----
42
tools/testing/selftests/bpf/progs/mptcp_bpf_iters.c | 8 ++------
10
3 files changed, 2 insertions(+), 9 deletions(-)
43
3 files changed, 3 insertions(+), 12 deletions(-)
11
44
12
diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h
45
diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h
13
index XXXXXXX..XXXXXXX 100644
46
index XXXXXXX..XXXXXXX 100644
14
--- a/tools/testing/selftests/bpf/bpf_experimental.h
47
--- a/tools/testing/selftests/bpf/bpf_experimental.h
15
+++ b/tools/testing/selftests/bpf/bpf_experimental.h
48
+++ b/tools/testing/selftests/bpf/bpf_experimental.h
...
...
33
-extern struct mptcp_sock *bpf_mptcp_sock_acquire(struct mptcp_sock *msk) __ksym;
66
-extern struct mptcp_sock *bpf_mptcp_sock_acquire(struct mptcp_sock *msk) __ksym;
34
-extern void bpf_mptcp_sock_release(struct mptcp_sock *msk) __ksym;
67
-extern void bpf_mptcp_sock_release(struct mptcp_sock *msk) __ksym;
35
-
68
-
36
extern struct mptcp_subflow_context *
69
extern struct mptcp_subflow_context *
37
bpf_mptcp_subflow_ctx(const struct sock *sk) __ksym;
70
bpf_mptcp_subflow_ctx(const struct sock *sk) __ksym;
38
extern struct sock *
71
-extern struct sock *
72
-bpf_mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow) __ksym;
73
74
extern void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow,
75
                    bool scheduled) __ksym;
39
diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_iters.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_iters.c
76
diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_iters.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_iters.c
40
index XXXXXXX..XXXXXXX 100644
77
index XXXXXXX..XXXXXXX 100644
41
--- a/tools/testing/selftests/bpf/progs/mptcp_bpf_iters.c
78
--- a/tools/testing/selftests/bpf/progs/mptcp_bpf_iters.c
42
+++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_iters.c
79
+++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_iters.c
43
@@ -XXX,XX +XXX,XX @@ int iters_subflow(struct bpf_sockopt *ctx)
80
@@ -XXX,XX +XXX,XX @@ int iters_subflow(struct bpf_sockopt *ctx)
...
...
51
+    bpf_for_each(mptcp_subflow, subflow, (struct sock *)sk) {
88
+    bpf_for_each(mptcp_subflow, subflow, (struct sock *)sk) {
52
        /* Here MPTCP-specific packet scheduler kfunc can be called:
89
        /* Here MPTCP-specific packet scheduler kfunc can be called:
53
         * this test is not doing anything really useful, only to
90
         * this test is not doing anything really useful, only to
54
         * verify the iteration works.
91
         * verify the iteration works.
55
@@ -XXX,XX +XXX,XX @@ int iters_subflow(struct bpf_sockopt *ctx)
92
@@ -XXX,XX +XXX,XX @@ int iters_subflow(struct bpf_sockopt *ctx)
93
94
        local_ids += subflow->subflow_id;
95
96
-        /* only to check the following kfunc works */
97
+        /* only to check the following helper works */
98
        ssk = mptcp_subflow_tcp_sock(subflow);
99
    }
100
101
@@ -XXX,XX +XXX,XX @@ int iters_subflow(struct bpf_sockopt *ctx)
56
    ids = local_ids;
102
    ids = local_ids;
57
103
58
out:
104
out:
59
-    bpf_mptcp_sock_release(msk);
105
-    bpf_mptcp_sock_release(msk);
60
    return 1;
106
    return 1;
61
}
107
}
62
--
108
--
63
2.43.0
109
2.43.0
diff view generated by jsdifflib