From: Geliang Tang <tanggeliang@kylinos.cn>
It's necessary to traverse all subflows on the conn_list of an MPTCP
socket and then call kfunc to modify the fields of each subflow. In
kernel space, mptcp_for_each_subflow() helper is used for this:
mptcp_for_each_subflow(msk, subflow)
kfunc(subflow);
But in the MPTCP BPF program, this has not yet been implemented. As
Martin suggested recently, this conn_list walking + modify-by-kfunc
usage fits the bpf_iter use case.
So this patch adds a new bpf_iter type named "mptcp_subflow" to do
this and implements its helpers bpf_iter_mptcp_subflow_new()/_next()/
_destroy(). And register these bpf_iter mptcp_subflow into mptcp
common kfunc set. Then bpf_for_each() for mptcp_subflow can be used
in BPF program like this:
bpf_for_each(mptcp_subflow, subflow, msk)
kfunc(subflow);
v2: remove msk->pm.lock in _new() and _destroy() (Martin)
drop DEFINE_BPF_ITER_FUNC, change opaque[3] to opaque[2] (Andrii)
v3: drop bpf_iter__mptcp_subflow
v4: if msk is NULL, initialize kit->msk to NULL in _new() and check it in
_next() (Andrii)
v5: use list_is_last() instead of list_entry_is_head()
add KF_ITER_NEW/NEXT/DESTROY flags
add msk_owned_by_me in _new()
v6: add KF_TRUSTED_ARGS flag (Andrii, Martin)
"Suggested-by: Martin KaFai Lau <martin.lau@kernel.org>"
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
---
net/mptcp/bpf.c | 49 +++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 45 insertions(+), 4 deletions(-)
diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c
index c8bc18dff85d..49df9e5d5667 100644
--- a/net/mptcp/bpf.c
+++ b/net/mptcp/bpf.c
@@ -201,9 +201,16 @@ static const struct btf_kfunc_id_set bpf_mptcp_fmodret_set = {
.set = &bpf_mptcp_fmodret_ids,
};
-__diag_push();
-__diag_ignore_all("-Wmissing-prototypes",
- "kfuncs which will be used in BPF programs");
+struct bpf_iter_mptcp_subflow {
+ __u64 __opaque[2];
+} __attribute__((aligned(8)));
+
+struct bpf_iter_mptcp_subflow_kern {
+ struct mptcp_sock *msk;
+ struct list_head *pos;
+} __attribute__((aligned(8)));
+
+__bpf_kfunc_start_defs();
__bpf_kfunc struct mptcp_sock *bpf_mptcp_sk(struct sock *sk)
{
@@ -222,6 +229,37 @@ bpf_mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow)
return mptcp_subflow_tcp_sock(subflow);
}
+__bpf_kfunc int bpf_iter_mptcp_subflow_new(struct bpf_iter_mptcp_subflow *it,
+ struct mptcp_sock *msk)
+{
+ struct bpf_iter_mptcp_subflow_kern *kit = (void *)it;
+
+ kit->msk = msk;
+ if (!msk)
+ return -EINVAL;
+
+ msk_owned_by_me(msk);
+
+ kit->pos = &msk->conn_list;
+ return 0;
+}
+
+__bpf_kfunc struct mptcp_subflow_context *
+bpf_iter_mptcp_subflow_next(struct bpf_iter_mptcp_subflow *it)
+{
+ struct bpf_iter_mptcp_subflow_kern *kit = (void *)it;
+
+ if (!kit->msk || list_is_last(kit->pos, &kit->msk->conn_list))
+ return NULL;
+
+ kit->pos = kit->pos->next;
+ return list_entry(kit->pos, struct mptcp_subflow_context, node);
+}
+
+__bpf_kfunc void bpf_iter_mptcp_subflow_destroy(struct bpf_iter_mptcp_subflow *it)
+{
+}
+
__bpf_kfunc struct mptcp_subflow_context *
bpf_mptcp_subflow_ctx_by_pos(const struct mptcp_sched_data *data, unsigned int pos)
{
@@ -235,7 +273,7 @@ __bpf_kfunc bool bpf_mptcp_subflow_queues_empty(struct sock *sk)
return tcp_rtx_queue_empty(sk);
}
-__diag_pop();
+__bpf_kfunc_end_defs();
BTF_KFUNCS_START(bpf_mptcp_common_kfunc_ids)
BTF_ID_FLAGS(func, bpf_mptcp_sk)
@@ -243,6 +281,9 @@ BTF_ID_FLAGS(func, bpf_mptcp_subflow_ctx)
BTF_ID_FLAGS(func, bpf_mptcp_subflow_tcp_sock)
BTF_ID_FLAGS(func, mptcp_subflow_set_scheduled)
BTF_ID_FLAGS(func, mptcp_subflow_active)
+BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_new, KF_ITER_NEW | KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_next, KF_ITER_NEXT | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_destroy, KF_ITER_DESTROY)
BTF_KFUNCS_END(bpf_mptcp_common_kfunc_ids)
static const struct btf_kfunc_id_set bpf_mptcp_common_kfunc_set = {
--
2.43.0