1 | From: Geliang Tang <tanggeliang@kylinos.cn> | 1 | From: Geliang Tang <tanggeliang@kylinos.cn> |
---|---|---|---|
2 | 2 | ||
3 | v5: | 3 | v5: |
4 | - update subjects and commit logs as Matt suggested (thanks). | 4 | - check bpf_iter_task in mptcp_subflow_new() as Mat suggested. |
5 | - avoid repeating checks in patch 1. | ||
6 | - do stricter checks on subflows in patch 3. | ||
7 | 5 | ||
8 | v4: | 6 | v4: |
9 | - CI reports the following BUILD_BUG_ON fails on i386: | 7 | - drop sock_owned_by_user_nocheck and spin_is_locked. According to |
10 | 8 | comments from Mat and Martin, in this set mptcp_subflow | |
11 | BUILD_BUG_ON(sizeof(struct bpf_iter_mptcp_subflow_kern) != | 9 | bpf_iter only used from a cg sockopt bpf prog, no need to add these |
12 | sizeof(struct bpf_iter_mptcp_subflow)) | 10 | check at this moment. |
13 | |||
14 | Just like in bpf_iter_task_new(), change this "!=" to ">". | ||
15 | 11 | ||
16 | v3: | 12 | v3: |
17 | - check sock_owned_by_user_nocheck(sk)/spin_is_locked(&sk->sk_lock.slock), | 13 | - patch 3, continue to use sock_owned_by_user_nocheck() and spin_is_locked() |
18 | instead of lockdep_sock_is_held(sk). | 14 | checks instead of using msk_owned_by_me(). |
19 | - add "sizeof" and "alignof" checks. | 15 | - patch 5, drop declaration of bpf_mptcp_subflow_tcp_sock. It's no longer |
20 | - drop bpf_mptcp_sk() and bpf_mptcp_subflow_tcp_sock() definitions. Use | 16 | used. |
21 | bpf_skc_to_mptcp_sock() and mptcp_subflow_tcp_sock() in mptcp_subflow | 17 | - patch 5, update the comment for mptcp_subflow_tcp_sock(), which is a BPF |
22 | bpf_iter selftests instead. | 18 | helper, not a kfunc. |
19 | |||
20 | The commit log of "bpf: Register mptcp common kfunc set" doesn't match the | ||
21 | code, please update it as: | ||
22 | |||
23 | ''' | ||
24 | bpf: Register mptcp common kfunc set | ||
25 | |||
26 | MPTCP helper mptcp_subflow_ctx() is used to convert struct sock to | ||
27 | struct mptcp_subflow_context. It will be used in MPTCP BPF programs. | ||
28 | |||
29 | This patch defines corresponding wrapper of this helper, and put it | ||
30 | into the newly defined mptcp common kfunc set and register this set | ||
31 | with the flag BPF_PROG_TYPE_CGROUP_SOCKOPT to let it accessible to | ||
32 | the 'cgroup/getsockopt' type of BPF programs. | ||
33 | ''' | ||
23 | 34 | ||
24 | v2: | 35 | v2: |
25 | - add CONFIG_LOCKDEP check in patch 2 to fix the build error reported | 36 | - Drop bpf_skc_to_mptcp_sock |
26 | by CI. | 37 | - Check the owner before assigning the msk as Mat suggested. |
38 | - Use bpf_core_cast() in mptcp_subflow bpf_iter subtest instead of | ||
39 | using bpf_skc_to_mptcp_sock(). | ||
27 | 40 | ||
28 | Address Martin's comments in v1. | 41 | Address Martin's suggestions for "Add mptcp_subflow bpf_iter support" v2. |
29 | 42 | ||
30 | Geliang Tang (5): | 43 | Geliang Tang (5): |
31 | bpf: Extend bpf_skc_to_mptcp_sock to MPTCP sock | 44 | mptcp: add bpf_iter_task for mptcp_sock |
32 | bpf: Allow use of skc_to_mptcp_sock in cg_sockopt | 45 | Squash to "bpf: Extend bpf_skc_to_mptcp_sock to MPTCP sock" |
33 | Squash to "bpf: Register mptcp common kfunc set" | ||
34 | Squash to "bpf: Add mptcp_subflow bpf_iter" | 46 | Squash to "bpf: Add mptcp_subflow bpf_iter" |
47 | Revert "bpf: Acquire and release mptcp socket" | ||
35 | Squash to "selftests/bpf: Add mptcp_subflow bpf_iter subtest" | 48 | Squash to "selftests/bpf: Add mptcp_subflow bpf_iter subtest" |
36 | 49 | ||
37 | include/net/mptcp.h | 4 +- | 50 | net/mptcp/bpf.c | 56 +++++++++---------- |
38 | kernel/bpf/cgroup.c | 2 + | 51 | net/mptcp/protocol.c | 1 + |
39 | net/core/filter.c | 2 +- | 52 | net/mptcp/protocol.h | 16 ++++++ |
40 | net/mptcp/bpf.c | 41 +++++++++++-------- | 53 | .../testing/selftests/bpf/bpf_experimental.h | 2 +- |
41 | tools/testing/selftests/bpf/progs/mptcp_bpf.h | 1 - | 54 | tools/testing/selftests/bpf/progs/mptcp_bpf.h | 5 -- |
42 | .../selftests/bpf/progs/mptcp_bpf_iters.c | 11 +++-- | 55 | .../selftests/bpf/progs/mptcp_bpf_iters.c | 8 +-- |
43 | 6 files changed, 33 insertions(+), 28 deletions(-) | 56 | 6 files changed, 47 insertions(+), 41 deletions(-) |
44 | 57 | ||
45 | -- | 58 | -- |
46 | 2.45.2 | 59 | 2.43.0 | diff view generated by jsdifflib |
1 | From: Geliang Tang <tanggeliang@kylinos.cn> | 1 | From: Geliang Tang <tanggeliang@kylinos.cn> |
---|---|---|---|
2 | 2 | ||
3 | Currently, bpf_skc_to_mptcp_sock() helper is not allowed to be used | 3 | To make sure the mptcp_subflow bpf_iter is running in the |
4 | in cg_sockopt. This patch adds this permission. | 4 | MPTCP context. This patch adds a simplified version of tracking |
5 | for it: | ||
5 | 6 | ||
6 | Thanks to the previous patch allowing skc_to_mptcp_sock() to be used | 7 | 1. Add a 'struct task_struct *bpf_iter_task' field to struct |
7 | with MPTCP sockets, this permission allows this helper to be use it in | 8 | mptcp_sock. |
8 | CGroup BPF hooks, e.g. [gs]etsocktopt. | ||
9 | 9 | ||
10 | 2. Do a WRITE_ONCE(msk->bpf_iter_task, current) before calling | ||
11 | a MPTCP BPF hook, and WRITE_ONCE(msk->bpf_iter_task, NULL) after | ||
12 | the hook returns. | ||
13 | |||
14 | 3. In bpf_iter_mptcp_subflow_new(), check | ||
15 | |||
16 | "READ_ONCE(msk->bpf_scheduler_task) == current" | ||
17 | |||
18 | to confirm the correct task, return -EINVAL if it doesn't match. | ||
19 | |||
20 | Also creates helpers for setting, clearing and checking that value. | ||
21 | |||
22 | Suggested-by: Mat Martineau <martineau@kernel.org> | ||
10 | Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> | 23 | Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> |
11 | --- | 24 | --- |
12 | kernel/bpf/cgroup.c | 2 ++ | 25 | net/mptcp/protocol.c | 1 + |
13 | 1 file changed, 2 insertions(+) | 26 | net/mptcp/protocol.h | 16 ++++++++++++++++ |
27 | 2 files changed, 17 insertions(+) | ||
14 | 28 | ||
15 | diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c | 29 | diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c |
16 | index XXXXXXX..XXXXXXX 100644 | 30 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/kernel/bpf/cgroup.c | 31 | --- a/net/mptcp/protocol.c |
18 | +++ b/kernel/bpf/cgroup.c | 32 | +++ b/net/mptcp/protocol.c |
19 | @@ -XXX,XX +XXX,XX @@ cg_sockopt_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) | 33 | @@ -XXX,XX +XXX,XX @@ static void __mptcp_init_sock(struct sock *sk) |
20 | #ifdef CONFIG_INET | 34 | msk->scaling_ratio = TCP_DEFAULT_SCALING_RATIO; |
21 | case BPF_FUNC_tcp_sock: | 35 | |
22 | return &bpf_tcp_sock_proto; | 36 | WRITE_ONCE(msk->first, NULL); |
23 | + case BPF_FUNC_skc_to_mptcp_sock: | 37 | + WRITE_ONCE(msk->bpf_iter_task, NULL); |
24 | + return &bpf_skc_to_mptcp_sock_proto; | 38 | inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss; |
39 | WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk))); | ||
40 | WRITE_ONCE(msk->allow_infinite_fallback, true); | ||
41 | diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h | ||
42 | index XXXXXXX..XXXXXXX 100644 | ||
43 | --- a/net/mptcp/protocol.h | ||
44 | +++ b/net/mptcp/protocol.h | ||
45 | @@ -XXX,XX +XXX,XX @@ struct mptcp_sock { | ||
46 | struct list_head conn_list; | ||
47 | struct list_head rtx_queue; | ||
48 | struct mptcp_data_frag *first_pending; | ||
49 | + struct task_struct *bpf_iter_task; | ||
50 | struct list_head join_list; | ||
51 | struct sock *first; /* The mptcp ops can safely dereference, using suitable | ||
52 | * ONCE annotation, the subflow outside the socket | ||
53 | @@ -XXX,XX +XXX,XX @@ mptcp_token_join_cookie_init_state(struct mptcp_subflow_request_sock *subflow_re | ||
54 | static inline void mptcp_join_cookie_init(void) {} | ||
25 | #endif | 55 | #endif |
26 | case BPF_FUNC_perf_event_output: | 56 | |
27 | return &bpf_event_output_data_proto; | 57 | +static inline void mptcp_set_bpf_iter_task(struct mptcp_sock *msk) |
58 | +{ | ||
59 | + WRITE_ONCE(msk->bpf_iter_task, current); | ||
60 | +} | ||
61 | + | ||
62 | +static inline void mptcp_clear_bpf_iter_task(struct mptcp_sock *msk) | ||
63 | +{ | ||
64 | + WRITE_ONCE(msk->bpf_iter_task, NULL); | ||
65 | +} | ||
66 | + | ||
67 | +static inline struct task_struct *mptcp_get_bpf_iter_task(struct mptcp_sock *msk) | ||
68 | +{ | ||
69 | + return READ_ONCE(msk->bpf_iter_task); | ||
70 | +} | ||
71 | + | ||
72 | #endif /* __MPTCP_PROTOCOL_H */ | ||
28 | -- | 73 | -- |
29 | 2.45.2 | 74 | 2.43.0 | diff view generated by jsdifflib |
1 | From: Geliang Tang <tanggeliang@kylinos.cn> | 1 | From: Geliang Tang <tanggeliang@kylinos.cn> |
---|---|---|---|
2 | 2 | ||
3 | Currently, bpf_skc_to_mptcp_sock() can only be used with sockets that | 3 | Set msk->bpf_iter_task in bpf_mptcp_sock_from_sock() to allow |
4 | are MPTCP subflows: TCP sockets with tp->is_mptcp, created by the kernel | 4 | mptcp_subflow bpt_iter can be used in cgroup/getsockopt, |
5 | from an MPTCP socket (IPPROTO_MPTCP). Typically used with BPF sock_ops | 5 | otherwise, the selftest in this set fails. |
6 | operators. | ||
7 | |||
8 | Here, this helper is extended to support MPTCP sockets, the ones created | ||
9 | by the userspace (IPPROTO_MPTCP). This is useful for BPF hooks involving | ||
10 | these sockets, e.g. [gs]etsocktopt. | ||
11 | |||
12 | bpf_skc_to_mptcp_sock() uses bpf_mptcp_sock_from_subflow(). The former | ||
13 | suggests any MPTCP type/subtype can be used, but the latter only accepts | ||
14 | subflow ones. So bpf_mptcp_sock_from_subflow is modified here to support | ||
15 | MPTCP socket, and renamed to avoid confusions. | ||
16 | 6 | ||
17 | Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> | 7 | Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> |
18 | --- | 8 | --- |
19 | include/net/mptcp.h | 4 ++-- | 9 | net/mptcp/bpf.c | 16 ++++++++++++---- |
20 | net/core/filter.c | 2 +- | 10 | 1 file changed, 12 insertions(+), 4 deletions(-) |
21 | net/mptcp/bpf.c | 10 ++++++++-- | ||
22 | 3 files changed, 11 insertions(+), 5 deletions(-) | ||
23 | 11 | ||
24 | diff --git a/include/net/mptcp.h b/include/net/mptcp.h | ||
25 | index XXXXXXX..XXXXXXX 100644 | ||
26 | --- a/include/net/mptcp.h | ||
27 | +++ b/include/net/mptcp.h | ||
28 | @@ -XXX,XX +XXX,XX @@ static inline void mptcpv6_handle_mapped(struct sock *sk, bool mapped) { } | ||
29 | #endif | ||
30 | |||
31 | #if defined(CONFIG_MPTCP) && defined(CONFIG_BPF_SYSCALL) | ||
32 | -struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk); | ||
33 | +struct mptcp_sock *bpf_mptcp_sock_from_sock(struct sock *sk); | ||
34 | #else | ||
35 | -static inline struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk) { return NULL; } | ||
36 | +static inline struct mptcp_sock *bpf_mptcp_sock_from_sock(struct sock *sk) { return NULL; } | ||
37 | #endif | ||
38 | |||
39 | #if !IS_ENABLED(CONFIG_MPTCP) | ||
40 | diff --git a/net/core/filter.c b/net/core/filter.c | ||
41 | index XXXXXXX..XXXXXXX 100644 | ||
42 | --- a/net/core/filter.c | ||
43 | +++ b/net/core/filter.c | ||
44 | @@ -XXX,XX +XXX,XX @@ const struct bpf_func_proto bpf_skc_to_unix_sock_proto = { | ||
45 | BPF_CALL_1(bpf_skc_to_mptcp_sock, struct sock *, sk) | ||
46 | { | ||
47 | BTF_TYPE_EMIT(struct mptcp_sock); | ||
48 | - return (unsigned long)bpf_mptcp_sock_from_subflow(sk); | ||
49 | + return (unsigned long)bpf_mptcp_sock_from_sock(sk); | ||
50 | } | ||
51 | |||
52 | const struct bpf_func_proto bpf_skc_to_mptcp_sock_proto = { | ||
53 | diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c | 12 | diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c |
54 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
55 | --- a/net/mptcp/bpf.c | 14 | --- a/net/mptcp/bpf.c |
56 | +++ b/net/mptcp/bpf.c | 15 | +++ b/net/mptcp/bpf.c |
57 | @@ -XXX,XX +XXX,XX @@ static struct bpf_struct_ops bpf_mptcp_sched_ops = { | 16 | @@ -XXX,XX +XXX,XX @@ static struct bpf_struct_ops bpf_mptcp_sched_ops = { |
58 | }; | 17 | |
59 | #endif /* CONFIG_BPF_JIT */ | 18 | struct mptcp_sock *bpf_mptcp_sock_from_sock(struct sock *sk) |
60 | |||
61 | -struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk) | ||
62 | +struct mptcp_sock *bpf_mptcp_sock_from_sock(struct sock *sk) | ||
63 | { | 19 | { |
64 | - if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP && sk_is_mptcp(sk)) | 20 | + struct mptcp_sock *msk; |
65 | + if (unlikely(!sk || !sk_fullsock(sk))) | ||
66 | + return NULL; | ||
67 | + | 21 | + |
68 | + if (sk->sk_protocol == IPPROTO_MPTCP) | 22 | if (unlikely(!sk || !sk_fullsock(sk))) |
69 | + return mptcp_sk(sk); | 23 | return NULL; |
70 | + | 24 | |
71 | + if (sk->sk_protocol == IPPROTO_TCP && sk_is_mptcp(sk)) | 25 | - if (sk->sk_protocol == IPPROTO_MPTCP) |
72 | return mptcp_sk(mptcp_subflow_ctx(sk)->conn); | 26 | - return mptcp_sk(sk); |
27 | + if (sk->sk_protocol == IPPROTO_MPTCP) { | ||
28 | + msk = mptcp_sk(sk); | ||
29 | + mptcp_set_bpf_iter_task(msk); | ||
30 | + return msk; | ||
31 | + } | ||
32 | |||
33 | - if (sk->sk_protocol == IPPROTO_TCP && sk_is_mptcp(sk)) | ||
34 | - return mptcp_sk(mptcp_subflow_ctx(sk)->conn); | ||
35 | + if (sk->sk_protocol == IPPROTO_TCP && sk_is_mptcp(sk)) { | ||
36 | + msk = mptcp_sk(mptcp_subflow_ctx(sk)->conn); | ||
37 | + mptcp_set_bpf_iter_task(msk); | ||
38 | + return msk; | ||
39 | + } | ||
73 | 40 | ||
74 | return NULL; | 41 | return NULL; |
42 | } | ||
75 | -- | 43 | -- |
76 | 2.45.2 | 44 | 2.43.0 | diff view generated by jsdifflib |
1 | From: Geliang Tang <tanggeliang@kylinos.cn> | 1 | From: Geliang Tang <tanggeliang@kylinos.cn> |
---|---|---|---|
2 | 2 | ||
3 | Add "sizeof" and "alignof" checks. | 3 | Drop the NULL check for 'msk' as Martin suggested, add more checks |
4 | for 'sk'. | ||
4 | 5 | ||
5 | Address Martin's comments in v1: | 6 | Use the "struct sock *sk" instead of "struct mptcp-sock *msk" as the |
7 | argument in the bpf_iter_mptcp_subflow_new as Martin suggested. | ||
6 | 8 | ||
7 | - bpf_iter_mptcp_subflow_new returns -EINVAL when msk socket lock isn't | 9 | v5: |
8 | held. | 10 | - check bpf_iter_task in mptcp_subflow_new() |
11 | |||
12 | v4: | ||
13 | - drop sock_owned_by_user_nocheck and spin_is_locked. According to | ||
14 | comments from Mat [2] and Martin [1], in this set mptcp_subflow | ||
15 | bpf_iter only used from a cg sockopt bpf prog, no need to add these | ||
16 | check at this moment. | ||
17 | |||
18 | [1] | ||
19 | https://lore.kernel.org/all/fdf0ddbe-e007-4a5f-bbdf-9a144e8fbe35@linux.dev/ | ||
20 | [2] | ||
21 | https://patchwork.kernel.org/project/mptcp/patch/f6469225598beecbf0bda12a4c33fafa86c0ff15.1739787744.git.tanggeliang@kylinos.cn/ | ||
22 | |||
23 | v3: | ||
24 | - continue to use sock_owned_by_user_nocheck and spin_is_locked | ||
25 | checks instead of using msk_owned_by_me(). | ||
26 | |||
27 | v2: | ||
28 | - check the owner before assigning the msk as Mat suggested. | ||
9 | 29 | ||
10 | Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> | 30 | Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> |
11 | --- | 31 | --- |
12 | net/mptcp/bpf.c | 10 +++++++++- | 32 | net/mptcp/bpf.c | 21 +++++++++++++++------ |
13 | 1 file changed, 9 insertions(+), 1 deletion(-) | 33 | 1 file changed, 15 insertions(+), 6 deletions(-) |
14 | 34 | ||
15 | diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c | 35 | diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c |
16 | index XXXXXXX..XXXXXXX 100644 | 36 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/net/mptcp/bpf.c | 37 | --- a/net/mptcp/bpf.c |
18 | +++ b/net/mptcp/bpf.c | 38 | +++ b/net/mptcp/bpf.c |
19 | @@ -XXX,XX +XXX,XX @@ bpf_iter_mptcp_subflow_new(struct bpf_iter_mptcp_subflow *it, | 39 | @@ -XXX,XX +XXX,XX @@ bpf_mptcp_subflow_ctx(const struct sock *sk) |
20 | struct mptcp_sock *msk) | 40 | |
41 | __bpf_kfunc static int | ||
42 | bpf_iter_mptcp_subflow_new(struct bpf_iter_mptcp_subflow *it, | ||
43 | - struct mptcp_sock *msk) | ||
44 | + struct sock *sk) | ||
21 | { | 45 | { |
22 | struct bpf_iter_mptcp_subflow_kern *kit = (void *)it; | 46 | struct bpf_iter_mptcp_subflow_kern *kit = (void *)it; |
23 | + struct sock *sk = (struct sock *)msk; | 47 | - struct sock *sk = (struct sock *)msk; |
48 | + struct task_struct *task; | ||
49 | + struct mptcp_sock *msk; | ||
50 | |||
51 | BUILD_BUG_ON(sizeof(struct bpf_iter_mptcp_subflow_kern) > | ||
52 | sizeof(struct bpf_iter_mptcp_subflow)); | ||
53 | BUILD_BUG_ON(__alignof__(struct bpf_iter_mptcp_subflow_kern) != | ||
54 | __alignof__(struct bpf_iter_mptcp_subflow)); | ||
55 | |||
56 | - kit->msk = msk; | ||
57 | - if (!msk) | ||
58 | + if (unlikely(!sk || !sk_fullsock(sk))) | ||
59 | return -EINVAL; | ||
60 | |||
61 | - if (!sock_owned_by_user_nocheck(sk) && | ||
62 | - !spin_is_locked(&sk->sk_lock.slock)) | ||
63 | + if (sk->sk_protocol != IPPROTO_MPTCP) | ||
64 | return -EINVAL; | ||
65 | |||
66 | + msk = mptcp_sk(sk); | ||
67 | + task = mptcp_get_bpf_iter_task(msk); | ||
68 | + if (!task || task != current) | ||
69 | + return -EINVAL; | ||
24 | + | 70 | + |
25 | + BUILD_BUG_ON(sizeof(struct bpf_iter_mptcp_subflow_kern) > | 71 | + mptcp_clear_bpf_iter_task(msk); |
26 | + sizeof(struct bpf_iter_mptcp_subflow)); | 72 | + |
27 | + BUILD_BUG_ON(__alignof__(struct bpf_iter_mptcp_subflow_kern) != | 73 | + msk_owned_by_me(msk); |
28 | + __alignof__(struct bpf_iter_mptcp_subflow)); | 74 | + |
29 | 75 | + kit->msk = msk; | |
30 | kit->msk = msk; | ||
31 | if (!msk) | ||
32 | return -EINVAL; | ||
33 | |||
34 | - msk_owned_by_me(msk); | ||
35 | + if (!sock_owned_by_user_nocheck(sk) && | ||
36 | + !spin_is_locked(&sk->sk_lock.slock)) | ||
37 | + return -EINVAL; | ||
38 | |||
39 | kit->pos = &msk->conn_list; | 76 | kit->pos = &msk->conn_list; |
40 | return 0; | 77 | return 0; |
78 | } | ||
41 | -- | 79 | -- |
42 | 2.45.2 | 80 | 2.43.0 | diff view generated by jsdifflib |
1 | From: Geliang Tang <tanggeliang@kylinos.cn> | 1 | From: Geliang Tang <tanggeliang@kylinos.cn> |
---|---|---|---|
2 | 2 | ||
3 | Drop bpf_mptcp_sk() and bpf_mptcp_subflow_tcp_sock() definitions. Use | 3 | Drop this patch as Martin suggested. |
4 | bpf_skc_to_mptcp_sock() and mptcp_subflow_tcp_sock() in mptcp_subflow | ||
5 | bpf_iter selftests instead. | ||
6 | 4 | ||
7 | Address Martin's comments in v1: | 5 | From Martin's review [1], this mptcp_sock_acquire() helper was a workaround |
6 | only to please the verifier, but they were not needed. | ||
8 | 7 | ||
9 | - add null-check for bpf_mptcp_subflow_ctx. | 8 | [1] |
10 | - add KF_RET_NULL flags for bpf_mptcp_subflow_ctx. | 9 | https://lore.kernel.org/9b373a23-c093-42d8-b4ae-99f2e62e7681@linux.dev |
11 | - register this kfunc set to BPF_PROG_TYPE_CGROUP_SOCKOPT only, | ||
12 | not BPF_PROG_TYPE_UNSPEC. | ||
13 | 10 | ||
14 | Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> | 11 | Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> |
15 | --- | 12 | --- |
16 | net/mptcp/bpf.c | 21 ++++++--------------- | 13 | net/mptcp/bpf.c | 19 ------------------- |
17 | 1 file changed, 6 insertions(+), 15 deletions(-) | 14 | 1 file changed, 19 deletions(-) |
18 | 15 | ||
19 | diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c | 16 | diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c |
20 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
21 | --- a/net/mptcp/bpf.c | 18 | --- a/net/mptcp/bpf.c |
22 | +++ b/net/mptcp/bpf.c | 19 | +++ b/net/mptcp/bpf.c |
23 | @@ -XXX,XX +XXX,XX @@ struct bpf_iter_mptcp_subflow_kern { | 20 | @@ -XXX,XX +XXX,XX @@ bpf_iter_mptcp_subflow_destroy(struct bpf_iter_mptcp_subflow *it) |
24 | 21 | { | |
25 | __bpf_kfunc_start_defs(); | 22 | } |
26 | 23 | ||
27 | -__bpf_kfunc static struct mptcp_sock *bpf_mptcp_sk(struct sock *sk) | 24 | -__bpf_kfunc static struct |
25 | -mptcp_sock *bpf_mptcp_sock_acquire(struct mptcp_sock *msk) | ||
28 | -{ | 26 | -{ |
29 | - return mptcp_sk(sk); | 27 | - struct sock *sk = (struct sock *)msk; |
28 | - | ||
29 | - if (sk && refcount_inc_not_zero(&sk->sk_refcnt)) | ||
30 | - return msk; | ||
31 | - return NULL; | ||
30 | -} | 32 | -} |
31 | - | 33 | - |
32 | __bpf_kfunc static struct mptcp_subflow_context * | 34 | -__bpf_kfunc static void bpf_mptcp_sock_release(struct mptcp_sock *msk) |
33 | bpf_mptcp_subflow_ctx(const struct sock *sk) | 35 | -{ |
36 | - struct sock *sk = (struct sock *)msk; | ||
37 | - | ||
38 | - WARN_ON_ONCE(!sk || !refcount_dec_not_one(&sk->sk_refcnt)); | ||
39 | -} | ||
40 | - | ||
41 | __bpf_kfunc struct mptcp_subflow_context * | ||
42 | bpf_mptcp_subflow_ctx_by_pos(const struct mptcp_sched_data *data, unsigned int pos) | ||
34 | { | 43 | { |
35 | - return mptcp_subflow_ctx(sk); | 44 | @@ -XXX,XX +XXX,XX @@ BTF_ID_FLAGS(func, bpf_mptcp_subflow_ctx, KF_RET_NULL) |
36 | -} | ||
37 | + if (sk && sk_fullsock(sk) && | ||
38 | + sk->sk_protocol == IPPROTO_TCP && sk_is_mptcp(sk)) | ||
39 | + return mptcp_subflow_ctx(sk); | ||
40 | |||
41 | -__bpf_kfunc static struct sock * | ||
42 | -bpf_mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow) | ||
43 | -{ | ||
44 | - return mptcp_subflow_tcp_sock(subflow); | ||
45 | + return NULL; | ||
46 | } | ||
47 | |||
48 | __bpf_kfunc static int | ||
49 | @@ -XXX,XX +XXX,XX @@ __bpf_kfunc static bool bpf_mptcp_subflow_queues_empty(struct sock *sk) | ||
50 | __bpf_kfunc_end_defs(); | ||
51 | |||
52 | BTF_KFUNCS_START(bpf_mptcp_common_kfunc_ids) | ||
53 | -BTF_ID_FLAGS(func, bpf_mptcp_sk) | ||
54 | -BTF_ID_FLAGS(func, bpf_mptcp_subflow_ctx) | ||
55 | -BTF_ID_FLAGS(func, bpf_mptcp_subflow_tcp_sock) | ||
56 | +BTF_ID_FLAGS(func, bpf_mptcp_subflow_ctx, KF_RET_NULL) | ||
57 | BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_new, KF_ITER_NEW | KF_TRUSTED_ARGS) | 45 | BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_new, KF_ITER_NEW | KF_TRUSTED_ARGS) |
58 | BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_next, KF_ITER_NEXT | KF_RET_NULL) | 46 | BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_next, KF_ITER_NEXT | KF_RET_NULL) |
59 | BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_destroy, KF_ITER_DESTROY) | 47 | BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_destroy, KF_ITER_DESTROY) |
60 | @@ -XXX,XX +XXX,XX @@ static int __init bpf_mptcp_kfunc_init(void) | 48 | -BTF_ID_FLAGS(func, bpf_mptcp_sock_acquire, KF_ACQUIRE | KF_RET_NULL) |
61 | int ret; | 49 | -BTF_ID_FLAGS(func, bpf_mptcp_sock_release, KF_RELEASE) |
62 | 50 | BTF_KFUNCS_END(bpf_mptcp_common_kfunc_ids) | |
63 | ret = register_btf_fmodret_id_set(&bpf_mptcp_fmodret_set); | 51 | |
64 | - ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, | 52 | static const struct btf_kfunc_id_set bpf_mptcp_common_kfunc_set = { |
65 | + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCKOPT, | ||
66 | &bpf_mptcp_common_kfunc_set); | ||
67 | ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, | ||
68 | &bpf_mptcp_sched_kfunc_set); | ||
69 | -- | 53 | -- |
70 | 2.45.2 | 54 | 2.43.0 | diff view generated by jsdifflib |
1 | From: Geliang Tang <tanggeliang@kylinos.cn> | 1 | From: Geliang Tang <tanggeliang@kylinos.cn> |
---|---|---|---|
2 | 2 | ||
3 | Use bpf_skc_to_mptcp_sock() and mptcp_subflow_tcp_sock() instead of | 3 | Use bpf_core_cast() instead of bpf_skc_to_mptcp_sock(). |
4 | bpf_mptcp_sk() and bpf_mptcp_subflow_tcp_sock(). | 4 | Change the 2nd parameter type of bpf_for_each() as 'struct sock'. |
5 | Drop use of bpf_mptcp_sock_acquire/release. | ||
6 | Drop declaration of bpf_mptcp_subflow_tcp_sock. It's no longer used. | ||
7 | Update the comment for mptcp_subflow_tcp_sock(), which is a BPF helper, | ||
8 | not a kfunc. | ||
5 | 9 | ||
6 | IPPROTO_MPTCP is checked in bpf_skc_to_mptcp_sock(), no need to check | 10 | Please update the commit log as: |
7 | it in BPF program. | ||
8 | 11 | ||
9 | bpf_skc_to_mptcp_sock() and bpf_mptcp_subflow_ctx() may return NULL, | 12 | ''' |
10 | need to check the return values. | 13 | This patch adds a "cgroup/getsockopt" program "iters_subflow" to test the |
14 | newly added mptcp_subflow bpf_iter. | ||
15 | |||
16 | Export mptcp_subflow helpers bpf_iter_mptcp_subflow_new/_next/_destroy | ||
17 | and other helpers into bpf_experimental.h. | ||
18 | |||
19 | Use bpf_for_each() to walk the subflow list of this msk. MPTCP-specific | ||
20 | packet scheduler kfunc can be called in the loop. In this test, just | ||
21 | add all subflow ids to local variable local_ids, then invoke the helper | ||
22 | mptcp_subflow_tcp_sock() in the loop to pick a subsocket. | ||
23 | |||
24 | Out of the loop, use bpf_mptcp_subflow_ctx() to get the subflow context | ||
25 | of the picked subsocket and do some verification. Finally, assign | ||
26 | local_ids to global variable ids so that the application can obtain this | ||
27 | value. | ||
28 | |||
29 | Add a subtest named test_iters_subflow to load and verify the newly added | ||
30 | mptcp_subflow type bpf_iter example in test_mptcp. Use the helper | ||
31 | endpoint_init() to add 3 new subflow endpoints. Send a byte of message | ||
32 | to start the mptcp connection, and wait for new subflows to be added. | ||
33 | getsockopt() is invoked to trigger the "cgroup/getsockopt" test program | ||
34 | "iters_subflow". Check if skel->bss->ids equals 10 to verify whether this | ||
35 | mptcp_subflow bpf_iter loops correctly as expected. | ||
36 | ''' | ||
11 | 37 | ||
12 | Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> | 38 | Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> |
13 | --- | 39 | --- |
14 | tools/testing/selftests/bpf/progs/mptcp_bpf.h | 1 - | 40 | tools/testing/selftests/bpf/bpf_experimental.h | 2 +- |
15 | tools/testing/selftests/bpf/progs/mptcp_bpf_iters.c | 11 +++++------ | 41 | tools/testing/selftests/bpf/progs/mptcp_bpf.h | 5 ----- |
16 | 2 files changed, 5 insertions(+), 7 deletions(-) | 42 | tools/testing/selftests/bpf/progs/mptcp_bpf_iters.c | 8 ++------ |
43 | 3 files changed, 3 insertions(+), 12 deletions(-) | ||
17 | 44 | ||
45 | diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h | ||
46 | index XXXXXXX..XXXXXXX 100644 | ||
47 | --- a/tools/testing/selftests/bpf/bpf_experimental.h | ||
48 | +++ b/tools/testing/selftests/bpf/bpf_experimental.h | ||
49 | @@ -XXX,XX +XXX,XX @@ extern void bpf_iter_css_destroy(struct bpf_iter_css *it) __weak __ksym; | ||
50 | |||
51 | struct bpf_iter_mptcp_subflow; | ||
52 | extern int bpf_iter_mptcp_subflow_new(struct bpf_iter_mptcp_subflow *it, | ||
53 | - struct mptcp_sock *msk) __weak __ksym; | ||
54 | + struct sock *sk) __weak __ksym; | ||
55 | extern struct mptcp_subflow_context * | ||
56 | bpf_iter_mptcp_subflow_next(struct bpf_iter_mptcp_subflow *it) __weak __ksym; | ||
57 | extern void | ||
18 | diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h b/tools/testing/selftests/bpf/progs/mptcp_bpf.h | 58 | diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h b/tools/testing/selftests/bpf/progs/mptcp_bpf.h |
19 | index XXXXXXX..XXXXXXX 100644 | 59 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h | 60 | --- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h |
21 | +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h | 61 | +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h |
22 | @@ -XXX,XX +XXX,XX @@ mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow) | 62 | @@ -XXX,XX +XXX,XX @@ mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow) |
23 | extern struct mptcp_sock *bpf_mptcp_sock_acquire(struct mptcp_sock *msk) __ksym; | 63 | } |
24 | extern void bpf_mptcp_sock_release(struct mptcp_sock *msk) __ksym; | 64 | |
25 | 65 | /* ksym */ | |
26 | -extern struct mptcp_sock *bpf_mptcp_sk(struct sock *sk) __ksym; | 66 | -extern struct mptcp_sock *bpf_mptcp_sock_acquire(struct mptcp_sock *msk) __ksym; |
67 | -extern void bpf_mptcp_sock_release(struct mptcp_sock *msk) __ksym; | ||
68 | - | ||
27 | extern struct mptcp_subflow_context * | 69 | extern struct mptcp_subflow_context * |
28 | bpf_mptcp_subflow_ctx(const struct sock *sk) __ksym; | 70 | bpf_mptcp_subflow_ctx(const struct sock *sk) __ksym; |
29 | extern struct sock * | 71 | -extern struct sock * |
72 | -bpf_mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow) __ksym; | ||
73 | |||
74 | extern void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow, | ||
75 | bool scheduled) __ksym; | ||
30 | diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_iters.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_iters.c | 76 | diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_iters.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_iters.c |
31 | index XXXXXXX..XXXXXXX 100644 | 77 | index XXXXXXX..XXXXXXX 100644 |
32 | --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_iters.c | 78 | --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_iters.c |
33 | +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_iters.c | 79 | +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_iters.c |
34 | @@ -XXX,XX +XXX,XX @@ int iters_subflow(struct bpf_sockopt *ctx) | 80 | @@ -XXX,XX +XXX,XX @@ int iters_subflow(struct bpf_sockopt *ctx) |
35 | struct mptcp_sock *msk; | 81 | if (!msk || msk->pm.server_side || !msk->pm.subflows) |
36 | int local_ids = 0; | ||
37 | |||
38 | - if (!sk || sk->protocol != IPPROTO_MPTCP || | ||
39 | - ctx->level != SOL_TCP || ctx->optname != TCP_IS_MPTCP) | ||
40 | + if (ctx->level != SOL_TCP || ctx->optname != TCP_IS_MPTCP) | ||
41 | return 1; | 82 | return 1; |
42 | 83 | ||
43 | - msk = bpf_mptcp_sk((struct sock *)sk); | 84 | - msk = bpf_mptcp_sock_acquire(msk); |
44 | - if (msk->pm.server_side || !msk->pm.subflows) | 85 | - if (!msk) |
45 | + msk = bpf_skc_to_mptcp_sock(sk); | 86 | - return 1; |
46 | + if (!msk || msk->pm.server_side || !msk->pm.subflows) | 87 | - bpf_for_each(mptcp_subflow, subflow, msk) { |
47 | return 1; | 88 | + bpf_for_each(mptcp_subflow, subflow, (struct sock *)sk) { |
48 | 89 | /* Here MPTCP-specific packet scheduler kfunc can be called: | |
49 | msk = bpf_mptcp_sock_acquire(msk); | 90 | * this test is not doing anything really useful, only to |
91 | * verify the iteration works. | ||
50 | @@ -XXX,XX +XXX,XX @@ int iters_subflow(struct bpf_sockopt *ctx) | 92 | @@ -XXX,XX +XXX,XX @@ int iters_subflow(struct bpf_sockopt *ctx) |
93 | |||
51 | local_ids += subflow->subflow_id; | 94 | local_ids += subflow->subflow_id; |
52 | 95 | ||
53 | /* only to check the following kfunc works */ | 96 | - /* only to check the following kfunc works */ |
54 | - ssk = bpf_mptcp_subflow_tcp_sock(subflow); | 97 | + /* only to check the following helper works */ |
55 | + ssk = mptcp_subflow_tcp_sock(subflow); | 98 | ssk = mptcp_subflow_tcp_sock(subflow); |
56 | } | 99 | } |
57 | 100 | ||
58 | if (!ssk) | ||
59 | @@ -XXX,XX +XXX,XX @@ int iters_subflow(struct bpf_sockopt *ctx) | 101 | @@ -XXX,XX +XXX,XX @@ int iters_subflow(struct bpf_sockopt *ctx) |
60 | |||
61 | /* only to check the following kfunc works */ | ||
62 | subflow = bpf_mptcp_subflow_ctx(ssk); | ||
63 | - if (subflow->token != msk->token) | ||
64 | + if (!subflow || subflow->token != msk->token) | ||
65 | goto out; | ||
66 | |||
67 | ids = local_ids; | 102 | ids = local_ids; |
103 | |||
104 | out: | ||
105 | - bpf_mptcp_sock_release(msk); | ||
106 | return 1; | ||
107 | } | ||
68 | -- | 108 | -- |
69 | 2.45.2 | 109 | 2.43.0 | diff view generated by jsdifflib |