:p
atchew
Login
From: Geliang Tang <tanggeliang@kylinos.cn> v5: - patch 2, drop mptcp_sock_type and mptcp_subflow_type. - patch 3, revert "bpf: Export more bpf_burst related functions" - patch 4, merge "bpf: Export more bpf_burst related functions" into it. v4: - patch 2, a new cleanup for "bpf: Add bpf_mptcp_sched_ops". - patch 3 should be reverted. - patch 8, register kfunc_set. v3: - rebased. - put the "drop has_bytes_sent" squash-to patch into this set. v2: - update bpf_rr and bpf_burst With the newly added mptcp_subflow bpf_iter, we can get rid of the subflows array "contexts" in struct mptcp_sched_data. This set uses bpf_for_each(mptcp_subflow) helper to update all the bpf schedules: bpf_for_each(mptcp_subflow, subflow, msk) { ... ... mptcp_subflow_set_scheduled(subflow, true); } Depends on: - "add mptcp_subflow bpf_iter" v9 Based-on: <cover.1728466623.git.tanggeliang@kylinos.cn> Geliang Tang (11): Revert "mptcp: add sched_data helpers" Squash to "bpf: Add bpf_mptcp_sched_ops" Revert "bpf: Export more bpf_burst related functions" Squash to "bpf: Add bpf_mptcp_sched_kfunc_set" Squash to "selftests/bpf: Add bpf_first scheduler & test" Squash to "selftests/bpf: Add bpf_bkup scheduler & test" Squash to "selftests/bpf: Add bpf_rr scheduler & test" Squash to "selftests/bpf: Add bpf_red scheduler & test" Squash to "selftests/bpf: Add bpf_burst scheduler & test" mptcp: drop subflow contexts in mptcp_sched_data Squash to "selftests/bpf: Add bpf scheduler test" - drop has_bytes_sent include/net/mptcp.h | 2 - net/mptcp/bpf.c | 24 ++---- net/mptcp/protocol.h | 3 - net/mptcp/sched.c | 22 ------ .../testing/selftests/bpf/prog_tests/mptcp.c | 48 ++++++------ tools/testing/selftests/bpf/progs/mptcp_bpf.h | 3 - .../selftests/bpf/progs/mptcp_bpf_bkup.c | 16 +--- .../selftests/bpf/progs/mptcp_bpf_burst.c | 78 +++++++++---------- .../selftests/bpf/progs/mptcp_bpf_bytes.c | 39 ++++++++++ .../selftests/bpf/progs/mptcp_bpf_first.c | 8 +- .../selftests/bpf/progs/mptcp_bpf_red.c | 8 +- .../selftests/bpf/progs/mptcp_bpf_rr.c | 24 +++--- 12 files changed, 133 insertions(+), 142 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/mptcp_bpf_bytes.c -- 2.43.0
From: Geliang Tang <tanggeliang@kylinos.cn> Drop this patch. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- net/mptcp/bpf.c | 8 -------- net/mptcp/protocol.h | 2 -- net/mptcp/sched.c | 22 ---------------------- 3 files changed, 32 deletions(-) diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/bpf.c +++ b/net/mptcp/bpf.c @@ -XXX,XX +XXX,XX @@ __bpf_kfunc void bpf_mptcp_sock_release(struct mptcp_sock *msk) WARN_ON_ONCE(!sk || !refcount_dec_not_one(&sk->sk_refcnt)); } -__bpf_kfunc struct mptcp_subflow_context * -bpf_mptcp_subflow_ctx_by_pos(const struct mptcp_sched_data *data, unsigned int pos) -{ - if (pos >= MPTCP_SUBFLOWS_MAX) - return NULL; - return data->contexts[pos]; -} - __bpf_kfunc bool bpf_mptcp_subflow_queues_empty(struct sock *sk) { return tcp_rtx_queue_empty(sk); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -XXX,XX +XXX,XX @@ void mptcp_sock_graft(struct sock *sk, struct socket *parent); u64 mptcp_wnd_end(const struct mptcp_sock *msk); void mptcp_set_timeout(struct sock *sk); bool bpf_mptcp_subflow_queues_empty(struct sock *sk); -struct mptcp_subflow_context * -bpf_mptcp_subflow_ctx_by_pos(const struct mptcp_sched_data *data, unsigned int pos); struct sock *__mptcp_nmpc_sk(struct mptcp_sock *msk); bool __mptcp_close(struct sock *sk, long timeout); void mptcp_cancel_work(struct sock *sk); diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/sched.c +++ b/net/mptcp/sched.c @@ -XXX,XX +XXX,XX @@ void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow, WRITE_ONCE(subflow->scheduled, scheduled); } -static void mptcp_sched_data_set_contexts(const struct mptcp_sock *msk, - struct mptcp_sched_data *data) -{ - struct mptcp_subflow_context *subflow; - int i = 0; - - mptcp_for_each_subflow(msk, subflow) { - if (i == MPTCP_SUBFLOWS_MAX) { - pr_warn_once("too many subflows"); - break; - } - mptcp_subflow_set_scheduled(subflow, false); - data->contexts[i++] = subflow; - } - data->subflows = i; - - for (; i < MPTCP_SUBFLOWS_MAX; i++) - data->contexts[i] = NULL; -} - int mptcp_sched_get_send(struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow; @@ -XXX,XX +XXX,XX @@ int mptcp_sched_get_send(struct mptcp_sock *msk) data.reinject = false; if (msk->sched == &mptcp_sched_default || !msk->sched) return mptcp_sched_default_get_subflow(msk, &data); - mptcp_sched_data_set_contexts(msk, &data); return msk->sched->get_subflow(msk, &data); } @@ -XXX,XX +XXX,XX @@ int mptcp_sched_get_retrans(struct mptcp_sock *msk) data.reinject = true; if (msk->sched == &mptcp_sched_default || !msk->sched) return mptcp_sched_default_get_subflow(msk, &data); - mptcp_sched_data_set_contexts(msk, &data); return msk->sched->get_subflow(msk, &data); } -- 2.43.0
From: Geliang Tang <tanggeliang@kylinos.cn> Please update the subject to bpf: Add mptcp packet scheduler struct_ops Drop mptcp_sock_type and mptcp_subflow_type. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- net/mptcp/bpf.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/bpf.c +++ b/net/mptcp/bpf.c @@ -XXX,XX +XXX,XX @@ #ifdef CONFIG_BPF_JIT static struct bpf_struct_ops bpf_mptcp_sched_ops; -static const struct btf_type *mptcp_sock_type, *mptcp_subflow_type __read_mostly; static u32 mptcp_sock_id, mptcp_subflow_id; +/* MPTCP BPF packet scheduler */ + static const struct bpf_func_proto * bpf_mptcp_sched_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) @@ -XXX,XX +XXX,XX @@ static int bpf_mptcp_sched_btf_struct_access(struct bpf_verifier_log *log, const struct bpf_reg_state *reg, int off, int size) { - const struct btf_type *t; + u32 id = reg->btf_id; size_t end; - t = btf_type_by_id(reg->btf, reg->btf_id); - - if (t == mptcp_sock_type) { + if (id == mptcp_sock_id) { switch (off) { case offsetof(struct mptcp_sock, snd_burst): end = offsetofend(struct mptcp_sock, snd_burst); @@ -XXX,XX +XXX,XX @@ static int bpf_mptcp_sched_btf_struct_access(struct bpf_verifier_log *log, off); return -EACCES; } - } else if (t == mptcp_subflow_type) { + } else if (id == mptcp_subflow_id) { switch (off) { case offsetof(struct mptcp_subflow_context, avg_pacing_rate): end = offsetofend(struct mptcp_subflow_context, avg_pacing_rate); @@ -XXX,XX +XXX,XX @@ static int bpf_mptcp_sched_btf_struct_access(struct bpf_verifier_log *log, if (off + size > end) { bpf_log(log, "access beyond %s at off %u size %u ended at %zu", - t == mptcp_sock_type ? "mptcp_sock" : "mptcp_subflow_context", + id == mptcp_sock_id ? "mptcp_sock" : "mptcp_subflow_context", off, size, end); return -EACCES; } @@ -XXX,XX +XXX,XX @@ static int bpf_mptcp_sched_init(struct btf *btf) if (type_id < 0) return -EINVAL; mptcp_sock_id = type_id; - mptcp_sock_type = btf_type_by_id(btf, mptcp_sock_id); type_id = btf_find_by_name_kind(btf, "mptcp_subflow_context", BTF_KIND_STRUCT); if (type_id < 0) return -EINVAL; mptcp_subflow_id = type_id; - mptcp_subflow_type = btf_type_by_id(btf, mptcp_subflow_id); return 0; } -- 2.43.0
From: Geliang Tang <tanggeliang@kylinos.cn> This reverts commit d0f5ca7a98fea075d60fb88f1c2b29a6f5f04d68. --- net/mptcp/bpf.c | 10 ---------- net/mptcp/protocol.c | 4 ++-- net/mptcp/protocol.h | 3 --- 3 files changed, 2 insertions(+), 15 deletions(-) diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/bpf.c +++ b/net/mptcp/bpf.c @@ -XXX,XX +XXX,XX @@ __bpf_kfunc void bpf_mptcp_sock_release(struct mptcp_sock *msk) WARN_ON_ONCE(!sk || !refcount_dec_not_one(&sk->sk_refcnt)); } -__bpf_kfunc bool bpf_mptcp_subflow_queues_empty(struct sock *sk) -{ - return tcp_rtx_queue_empty(sk); -} - __bpf_kfunc_end_defs(); BTF_KFUNCS_START(bpf_mptcp_common_kfunc_ids) @@ -XXX,XX +XXX,XX @@ static const struct btf_kfunc_id_set bpf_mptcp_common_kfunc_set = { BTF_KFUNCS_START(bpf_mptcp_sched_kfunc_ids) BTF_ID_FLAGS(func, bpf_mptcp_subflow_ctx_by_pos) -BTF_ID_FLAGS(func, mptcp_set_timeout) -BTF_ID_FLAGS(func, mptcp_wnd_end) -BTF_ID_FLAGS(func, tcp_stream_memory_free) -BTF_ID_FLAGS(func, bpf_mptcp_subflow_queues_empty) -BTF_ID_FLAGS(func, mptcp_pm_subflow_chk_stale) BTF_KFUNCS_END(bpf_mptcp_sched_kfunc_ids) static const struct btf_kfunc_id_set bpf_mptcp_sched_kfunc_set = { diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -XXX,XX +XXX,XX @@ DEFINE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions); static struct net_device mptcp_napi_dev; /* Returns end sequence number of the receiver's advertised window */ -u64 mptcp_wnd_end(const struct mptcp_sock *msk) +static u64 mptcp_wnd_end(const struct mptcp_sock *msk) { return READ_ONCE(msk->wnd_end); } @@ -XXX,XX +XXX,XX @@ static long mptcp_timeout_from_subflow(const struct mptcp_subflow_context *subfl inet_csk(ssk)->icsk_timeout - jiffies : 0; } -void mptcp_set_timeout(struct sock *sk) +static void mptcp_set_timeout(struct sock *sk) { struct mptcp_subflow_context *subflow; long tout = 0; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -XXX,XX +XXX,XX @@ void __mptcp_subflow_send_ack(struct sock *ssk); void mptcp_subflow_reset(struct sock *ssk); void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk); void mptcp_sock_graft(struct sock *sk, struct socket *parent); -u64 mptcp_wnd_end(const struct mptcp_sock *msk); -void mptcp_set_timeout(struct sock *sk); -bool bpf_mptcp_subflow_queues_empty(struct sock *sk); struct sock *__mptcp_nmpc_sk(struct mptcp_sock *msk); bool __mptcp_close(struct sock *sk, long timeout); void mptcp_cancel_work(struct sock *sk); -- 2.43.0
From: Geliang Tang <tanggeliang@kylinos.cn> Please update the subject to "bpf: Export mptcp packet scheduler helpers" Remove bpf_mptcp_subflow_ctx_by_pos from BPF kfunc set. Merge "bpf: Export more bpf_burst related functions" into this patch. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- net/mptcp/bpf.c | 11 ++++++++++- net/mptcp/protocol.c | 4 ++-- net/mptcp/protocol.h | 2 ++ 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/bpf.c +++ b/net/mptcp/bpf.c @@ -XXX,XX +XXX,XX @@ __bpf_kfunc void bpf_mptcp_sock_release(struct mptcp_sock *msk) WARN_ON_ONCE(!sk || !refcount_dec_not_one(&sk->sk_refcnt)); } +__bpf_kfunc bool bpf_mptcp_subflow_queues_empty(struct sock *sk) +{ + return tcp_rtx_queue_empty(sk); +} + __bpf_kfunc_end_defs(); BTF_KFUNCS_START(bpf_mptcp_common_kfunc_ids) @@ -XXX,XX +XXX,XX @@ static const struct btf_kfunc_id_set bpf_mptcp_common_kfunc_set = { }; BTF_KFUNCS_START(bpf_mptcp_sched_kfunc_ids) -BTF_ID_FLAGS(func, bpf_mptcp_subflow_ctx_by_pos) +BTF_ID_FLAGS(func, mptcp_set_timeout) +BTF_ID_FLAGS(func, mptcp_wnd_end) +BTF_ID_FLAGS(func, tcp_stream_memory_free) +BTF_ID_FLAGS(func, bpf_mptcp_subflow_queues_empty) +BTF_ID_FLAGS(func, mptcp_pm_subflow_chk_stale) BTF_KFUNCS_END(bpf_mptcp_sched_kfunc_ids) static const struct btf_kfunc_id_set bpf_mptcp_sched_kfunc_set = { diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -XXX,XX +XXX,XX @@ DEFINE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions); static struct net_device mptcp_napi_dev; /* Returns end sequence number of the receiver's advertised window */ -static u64 mptcp_wnd_end(const struct mptcp_sock *msk) +u64 mptcp_wnd_end(const struct mptcp_sock *msk) { return READ_ONCE(msk->wnd_end); } @@ -XXX,XX +XXX,XX @@ static long mptcp_timeout_from_subflow(const struct mptcp_subflow_context *subfl inet_csk(ssk)->icsk_timeout - jiffies : 0; } -static void mptcp_set_timeout(struct sock *sk) +void mptcp_set_timeout(struct sock *sk) { struct mptcp_subflow_context *subflow; long tout = 0; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -XXX,XX +XXX,XX @@ void __mptcp_subflow_send_ack(struct sock *ssk); void mptcp_subflow_reset(struct sock *ssk); void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk); void mptcp_sock_graft(struct sock *sk, struct socket *parent); +u64 mptcp_wnd_end(const struct mptcp_sock *msk); +void mptcp_set_timeout(struct sock *sk); struct sock *__mptcp_nmpc_sk(struct mptcp_sock *msk); bool __mptcp_close(struct sock *sk, long timeout); void mptcp_cancel_work(struct sock *sk); -- 2.43.0
From: Geliang Tang <tanggeliang@kylinos.cn> Use the newly added bpf_for_each() helper to walk the conn_list. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- tools/testing/selftests/bpf/progs/mptcp_bpf.h | 3 --- tools/testing/selftests/bpf/progs/mptcp_bpf_first.c | 8 +++++++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h b/tools/testing/selftests/bpf/progs/mptcp_bpf.h index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h @@ -XXX,XX +XXX,XX @@ bpf_mptcp_subflow_ctx(const struct sock *sk) __ksym; extern struct sock * bpf_mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow) __ksym; -extern struct mptcp_subflow_context * -bpf_mptcp_subflow_ctx_by_pos(const struct mptcp_sched_data *data, unsigned int pos) __ksym; - #endif diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c @@ -XXX,XX +XXX,XX @@ SEC("struct_ops") int BPF_PROG(bpf_first_get_subflow, struct mptcp_sock *msk, struct mptcp_sched_data *data) { - mptcp_subflow_set_scheduled(bpf_mptcp_subflow_ctx_by_pos(data, 0), true); + struct mptcp_subflow_context *subflow; + + bpf_for_each(mptcp_subflow, subflow, msk) { + mptcp_subflow_set_scheduled(subflow, true); + break; + } + return 0; } -- 2.43.0
From: Geliang Tang <tanggeliang@kylinos.cn> Use the newly added bpf_for_each() helper to walk the conn_list. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- .../testing/selftests/bpf/progs/mptcp_bpf_bkup.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_bkup.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_bkup.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_bkup.c +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_bkup.c @@ -XXX,XX +XXX,XX @@ SEC("struct_ops") int BPF_PROG(bpf_bkup_get_subflow, struct mptcp_sock *msk, struct mptcp_sched_data *data) { - int nr = -1; - - for (int i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) { - struct mptcp_subflow_context *subflow; - - subflow = bpf_mptcp_subflow_ctx_by_pos(data, i); - if (!subflow) - break; + struct mptcp_subflow_context *subflow; + bpf_for_each(mptcp_subflow, subflow, msk) { if (!BPF_CORE_READ_BITFIELD_PROBED(subflow, backup) || !BPF_CORE_READ_BITFIELD_PROBED(subflow, request_bkup)) { - nr = i; + mptcp_subflow_set_scheduled(subflow, true); break; } } - if (nr != -1) { - mptcp_subflow_set_scheduled(bpf_mptcp_subflow_ctx_by_pos(data, nr), true); - return -1; - } return 0; } -- 2.43.0
From: Geliang Tang <tanggeliang@kylinos.cn> Use the newly added bpf_for_each() helper to walk the conn_list. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- .../selftests/bpf/progs/mptcp_bpf_rr.c | 24 ++++++++----------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c @@ -XXX,XX +XXX,XX @@ SEC("struct_ops") int BPF_PROG(bpf_rr_get_subflow, struct mptcp_sock *msk, struct mptcp_sched_data *data) { - struct mptcp_subflow_context *subflow; + struct mptcp_subflow_context *subflow, *next; struct mptcp_rr_storage *ptr; struct sock *last_snd = NULL; - int nr = 0; ptr = bpf_sk_storage_get(&mptcp_rr_map, msk, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); @@ -XXX,XX +XXX,XX @@ int BPF_PROG(bpf_rr_get_subflow, struct mptcp_sock *msk, return -1; last_snd = ptr->last_snd; + next = bpf_mptcp_subflow_ctx(msk->first); - for (int i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) { - subflow = bpf_mptcp_subflow_ctx_by_pos(data, i); - if (!last_snd || !subflow) + bpf_for_each(mptcp_subflow, subflow, msk) { + if (!last_snd) break; - if (mptcp_subflow_tcp_sock(subflow) == last_snd) { - if (i + 1 == MPTCP_SUBFLOWS_MAX || - !bpf_mptcp_subflow_ctx_by_pos(data, i + 1)) + if (bpf_mptcp_subflow_tcp_sock(subflow) == last_snd) { + subflow = bpf_iter_mptcp_subflow_next(&___it); + if (!subflow) break; - nr = i + 1; + next = subflow; break; } } - subflow = bpf_mptcp_subflow_ctx_by_pos(data, nr); - if (!subflow) - return -1; - mptcp_subflow_set_scheduled(subflow, true); - ptr->last_snd = mptcp_subflow_tcp_sock(subflow); + mptcp_subflow_set_scheduled(next, true); + ptr->last_snd = bpf_mptcp_subflow_tcp_sock(next); return 0; } -- 2.43.0
From: Geliang Tang <tanggeliang@kylinos.cn> Use the newly added bpf_for_each() helper to walk the conn_list. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- tools/testing/selftests/bpf/progs/mptcp_bpf_red.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_red.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_red.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_red.c +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_red.c @@ -XXX,XX +XXX,XX @@ SEC("struct_ops") int BPF_PROG(bpf_red_get_subflow, struct mptcp_sock *msk, struct mptcp_sched_data *data) { - for (int i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) { - if (!bpf_mptcp_subflow_ctx_by_pos(data, i)) - break; + struct mptcp_subflow_context *subflow; - mptcp_subflow_set_scheduled(bpf_mptcp_subflow_ctx_by_pos(data, i), true); - } + bpf_for_each(mptcp_subflow, subflow, msk) + mptcp_subflow_set_scheduled(subflow, true); return 0; } -- 2.43.0
From: Geliang Tang <tanggeliang@kylinos.cn> Use the newly added bpf_for_each() helper to walk the conn_list. Drop mptcp_subflow_active declaration. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- .../selftests/bpf/progs/mptcp_bpf_burst.c | 78 +++++++++---------- 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_burst.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_burst.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_burst.c +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_burst.c @@ -XXX,XX +XXX,XX @@ char _license[] SEC("license") = "GPL"; #define min(a, b) ((a) < (b) ? (a) : (b)) +#define SSK_MODE_ACTIVE 0 +#define SSK_MODE_BACKUP 1 +#define SSK_MODE_MAX 2 + struct bpf_subflow_send_info { __u8 subflow_id; __u64 linger_time; }; -extern bool mptcp_subflow_active(struct mptcp_subflow_context *subflow) __ksym; extern void mptcp_set_timeout(struct sock *sk) __ksym; extern __u64 mptcp_wnd_end(const struct mptcp_sock *msk) __ksym; extern bool tcp_stream_memory_free(const struct sock *sk, int wake) __ksym; extern bool bpf_mptcp_subflow_queues_empty(struct sock *sk) __ksym; extern void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) __ksym; -#define SSK_MODE_ACTIVE 0 -#define SSK_MODE_BACKUP 1 -#define SSK_MODE_MAX 2 - static __always_inline __u64 div_u64(__u64 dividend, __u32 divisor) { return dividend / divisor; @@ -XXX,XX +XXX,XX @@ static __always_inline bool sk_stream_memory_free(const struct sock *sk) return __sk_stream_memory_free(sk, 0); } +static struct mptcp_subflow_context * +mptcp_lookup_subflow_by_id(struct mptcp_sock *msk, unsigned int id) +{ + struct mptcp_subflow_context *subflow; + + bpf_for_each(mptcp_subflow, subflow, msk) { + if (subflow->subflow_id == id) + return subflow; + } + + return NULL; +} + SEC("struct_ops") void BPF_PROG(mptcp_sched_burst_init, struct mptcp_sock *msk) { @@ -XXX,XX +XXX,XX @@ void BPF_PROG(mptcp_sched_burst_release, struct mptcp_sock *msk) { } -static int bpf_burst_get_send(struct mptcp_sock *msk, - struct mptcp_sched_data *data) +static int bpf_burst_get_send(struct mptcp_sock *msk) { struct bpf_subflow_send_info send_info[SSK_MODE_MAX]; struct mptcp_subflow_context *subflow; @@ -XXX,XX +XXX,XX @@ static int bpf_burst_get_send(struct mptcp_sock *msk, send_info[i].linger_time = -1; } - for (i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) { - bool backup; + bpf_for_each(mptcp_subflow, subflow, msk) { + bool backup = subflow->backup || subflow->request_bkup; - subflow = bpf_mptcp_subflow_ctx_by_pos(data, i); - if (!subflow) - break; - - backup = subflow->backup || subflow->request_bkup; - - ssk = mptcp_subflow_tcp_sock(subflow); + ssk = bpf_mptcp_subflow_tcp_sock(subflow); if (!mptcp_subflow_active(subflow)) continue; @@ -XXX,XX +XXX,XX @@ static int bpf_burst_get_send(struct mptcp_sock *msk, linger_time = div_u64((__u64)ssk->sk_wmem_queued << 32, pace); if (linger_time < send_info[backup].linger_time) { - send_info[backup].subflow_id = i; + send_info[backup].subflow_id = subflow->subflow_id; send_info[backup].linger_time = linger_time; } } @@ -XXX,XX +XXX,XX @@ static int bpf_burst_get_send(struct mptcp_sock *msk, if (!nr_active) send_info[SSK_MODE_ACTIVE].subflow_id = send_info[SSK_MODE_BACKUP].subflow_id; - subflow = bpf_mptcp_subflow_ctx_by_pos(data, send_info[SSK_MODE_ACTIVE].subflow_id); + subflow = mptcp_lookup_subflow_by_id(msk, send_info[SSK_MODE_ACTIVE].subflow_id); if (!subflow) return -1; - ssk = mptcp_subflow_tcp_sock(subflow); + ssk = bpf_mptcp_subflow_tcp_sock(subflow); if (!ssk || !sk_stream_memory_free(ssk)) return -1; @@ -XXX,XX +XXX,XX @@ static int bpf_burst_get_send(struct mptcp_sock *msk, return 0; } -static int bpf_burst_get_retrans(struct mptcp_sock *msk, - struct mptcp_sched_data *data) +static int bpf_burst_get_retrans(struct mptcp_sock *msk) { - int backup = MPTCP_SUBFLOWS_MAX, pick = MPTCP_SUBFLOWS_MAX, subflow_id; + struct sock *backup = NULL, *pick = NULL; struct mptcp_subflow_context *subflow; int min_stale_count = INT_MAX; - struct sock *ssk; - for (int i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) { - subflow = bpf_mptcp_subflow_ctx_by_pos(data, i); - if (!subflow) - break; + bpf_for_each(mptcp_subflow, subflow, msk) { + struct sock *ssk = bpf_mptcp_subflow_tcp_sock(subflow); if (!mptcp_subflow_active(subflow)) continue; - ssk = mptcp_subflow_tcp_sock(subflow); /* still data outstanding at TCP level? skip this */ if (!tcp_rtx_and_write_queues_empty(ssk)) { mptcp_pm_subflow_chk_stale(msk, ssk); @@ -XXX,XX +XXX,XX @@ static int bpf_burst_get_retrans(struct mptcp_sock *msk, } if (subflow->backup || subflow->request_bkup) { - if (backup == MPTCP_SUBFLOWS_MAX) - backup = i; + if (!backup) + backup = ssk; continue; } - if (pick == MPTCP_SUBFLOWS_MAX) - pick = i; + if (!pick) + pick = ssk; } - if (pick < MPTCP_SUBFLOWS_MAX) { - subflow_id = pick; + if (pick) goto out; - } - subflow_id = min_stale_count > 1 ? backup : MPTCP_SUBFLOWS_MAX; + pick = min_stale_count > 1 ? backup : NULL; out: - subflow = bpf_mptcp_subflow_ctx_by_pos(data, subflow_id); + if (!pick) + return -1; + subflow = bpf_mptcp_subflow_ctx(pick); if (!subflow) return -1; mptcp_subflow_set_scheduled(subflow, true); @@ -XXX,XX +XXX,XX @@ int BPF_PROG(bpf_burst_get_subflow, struct mptcp_sock *msk, struct mptcp_sched_data *data) { if (data->reinject) - return bpf_burst_get_retrans(msk, data); - return bpf_burst_get_send(msk, data); + return bpf_burst_get_retrans(msk); + return bpf_burst_get_send(msk); } SEC(".struct_ops") -- 2.43.0
From: Geliang Tang <tanggeliang@kylinos.cn> The mptcp_subflow bpf_iter is added now, it's better to use the helper bpf_for_each(mptcp_subflow) to traverse all subflows on the conn_list of an MPTCP socket and then call kfunc to modify the fields of each subflow in the WIP MPTCP BPF packet scheduler examples, instead of converting them to a fixed array. With this helper, we can get rid of this subflow array "contexts" and the size of it "subflows" in struct mptcp_sched_data. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- include/net/mptcp.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/net/mptcp.h b/include/net/mptcp.h index XXXXXXX..XXXXXXX 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -XXX,XX +XXX,XX @@ struct mptcp_out_options { struct mptcp_sched_data { bool reinject; - u8 subflows; - struct mptcp_subflow_context *contexts[MPTCP_SUBFLOWS_MAX]; }; struct mptcp_sched_ops { -- 2.43.0
From: Geliang Tang <tanggeliang@kylinos.cn> Drop ss_search() and has_bytes_sent(), add a new bpf program to check the bytes_sent. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- .../testing/selftests/bpf/prog_tests/mptcp.c | 48 ++++++++++--------- .../selftests/bpf/progs/mptcp_bpf_bytes.c | 39 +++++++++++++++ 2 files changed, 65 insertions(+), 22 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/mptcp_bpf_bytes.c diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -XXX,XX +XXX,XX @@ #include "mptcpify.skel.h" #include "mptcp_subflow.skel.h" #include "mptcp_bpf_iters_subflow.skel.h" +#include "mptcp_bpf_bytes.skel.h" #include "mptcp_bpf_first.skel.h" #include "mptcp_bpf_bkup.skel.h" #include "mptcp_bpf_rr.skel.h" @@ -XXX,XX +XXX,XX @@ static struct nstoken *sched_init(char *flags, char *sched) return NULL; } -static int ss_search(char *src, char *dst, char *port, char *keyword) -{ - return SYS_NOFAIL("ip netns exec %s ss -enita src %s dst %s %s %d | grep -q '%s'", - NS_TEST, src, dst, port, PORT_1, keyword); -} - -static int has_bytes_sent(char *dst) -{ - return ss_search(ADDR_1, dst, "sport", "bytes_sent:"); -} - static void send_data_and_verify(char *sched, bool addr1, bool addr2) { + int server_fd, client_fd, err; + struct mptcp_bpf_bytes *skel; struct timespec start, end; - int server_fd, client_fd; unsigned int delta_ms; + skel = mptcp_bpf_bytes__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load: bytes")) + return; + + skel->bss->pid = getpid(); + + err = mptcp_bpf_bytes__attach(skel); + if (!ASSERT_OK(err, "skel_attach: bytes")) + goto skel_destroy; + server_fd = start_mptcp_server(AF_INET, ADDR_1, PORT_1, 0); if (!ASSERT_OK_FD(server_fd, "start_mptcp_server")) - return; + goto skel_destroy; client_fd = connect_to_fd(server_fd, 0); if (!ASSERT_OK_FD(client_fd, "connect_to_fd")) - goto fail; + goto close_server; if (clock_gettime(CLOCK_MONOTONIC, &start) < 0) - goto fail; + goto close_client; if (!ASSERT_OK(send_recv_data(server_fd, client_fd, total_bytes), "send_recv_data")) - goto fail; + goto close_client; if (clock_gettime(CLOCK_MONOTONIC, &end) < 0) - goto fail; + goto close_client; delta_ms = (end.tv_sec - start.tv_sec) * 1000 + (end.tv_nsec - start.tv_nsec) / 1000000; printf("%s: %u ms\n", sched, delta_ms); if (addr1) - CHECK(has_bytes_sent(ADDR_1), sched, "should have bytes_sent on addr1\n"); + ASSERT_GT(skel->bss->bytes_sent_1, 0, "should have bytes_sent on addr1"); else - CHECK(!has_bytes_sent(ADDR_1), sched, "shouldn't have bytes_sent on addr1\n"); + ASSERT_EQ(skel->bss->bytes_sent_1, 0, "shouldn't have bytes_sent on addr1"); if (addr2) - CHECK(has_bytes_sent(ADDR_2), sched, "should have bytes_sent on addr2\n"); + ASSERT_GT(skel->bss->bytes_sent_2, 0, "should have bytes_sent on addr2"); else - CHECK(!has_bytes_sent(ADDR_2), sched, "shouldn't have bytes_sent on addr2\n"); + ASSERT_EQ(skel->bss->bytes_sent_2, 0, "shouldn't have bytes_sent on addr2"); +close_client: close(client_fd); -fail: +close_server: close(server_fd); +skel_destroy: + mptcp_bpf_bytes__destroy(skel); } static void test_default(void) diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_bytes.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_bytes.c new file mode 100644 index XXXXXXX..XXXXXXX --- /dev/null +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_bytes.c @@ -XXX,XX +XXX,XX @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024, Kylin Software */ + +/* vmlinux.h, bpf_helpers.h and other 'define' */ +#include "bpf_tracing_net.h" +#include "mptcp_bpf.h" + +char _license[] SEC("license") = "GPL"; +u64 bytes_sent_1 = 0; +u64 bytes_sent_2 = 0; +int pid; + +SEC("fexit/mptcp_sched_get_send") +int BPF_PROG(trace_mptcp_sched_get_send, struct mptcp_sock *msk) +{ + struct mptcp_subflow_context *subflow; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + if (!msk->pm.server_side) + return 0; + + mptcp_for_each_subflow(msk, subflow) { + struct tcp_sock *tp; + struct sock *ssk; + + subflow = bpf_core_cast(subflow, struct mptcp_subflow_context); + ssk = mptcp_subflow_tcp_sock(subflow); + tp = bpf_core_cast(ssk, struct tcp_sock); + + if (subflow->subflow_id == 1) + bytes_sent_1 = tp->bytes_sent; + else if (subflow->subflow_id == 2) + bytes_sent_2 = tp->bytes_sent; + } + + return 0; +} -- 2.43.0
From: Geliang Tang <tanggeliang@kylinos.cn> v14: - patch 1, keep mptcp_sched_data_set_contexts() helper for future use. - patch 2, use "sk = sk__ign" in bpf_mptcp_subflow_ctx() and bpf_sk_stream_memory_free(). - patch 8, drop "subflows" from struct mptcp_sched_data too. Depends on: - Squash to "Add mptcp_subflow bpf_iter support", v2 Based-on: <cover.1738470660.git.tanggeliang@kylinos.cn> v13: - use '__ign' suffix to ignore the argument type checks of bpf_mptcp_subflow_ctx() and bpf_sk_stream_memory_free(), instead of adding a new helper bpf_mptcp_send_info_to_ssk(). - use 'bpf_for_each(mptcp_subflow, subflow, (struct sock *)msk)' instead of using 'bpf_for_each(mptcp_subflow, subflow, msk)'. - keep struct mptcp_sched_data for future use. v12: - drop struct mptcp_sched_data. - rebased on "split get_subflow interface into two" v2. v11: If another squash-to patchset (Squash to "Add mptcp_subflow bpf_iter support") under review is merged before this set, v10 will fail to run. v11 fixes this issue and can run regardless of whether it is merged before or after the squash-to patchset. Compared with v10, only patches 3, 5, and 8 have been modified: - use mptcp_subflow_tcp_sock instead of bpf_mptcp_subflow_tcp_sock in patch 3 and patch 5. - drop bpf_mptcp_sched_kfunc_set, use bpf_mptcp_common_kfunc_set instead in patch 8. v10: - drop mptcp_subflow_set_scheduled() helper and WRITE_ONCE() in BPF. - add new bpf helper bpf_mptcp_send_info_to_ssk() for burst scheduler. v9: - merge 'Fixes for "use bpf_iter in bpf schedulers" v8' into this set. - rebased on "add netns helpers" v4 v8: - address Mat's comments in v7. - move sk_stream_memory_free check inside bpf_for_each() loop. - implement mptcp_subflow_set_scheduled helper in BPF. - add cleanup patches into this set again. v7: - move cleanup patches out of this set. - rebased. v6: - rebased to "add mptcp_subflow bpf_iter" v10 v5: - patch 2, drop mptcp_sock_type and mptcp_subflow_type. - patch 3, revert "bpf: Export more bpf_burst related functions" - patch 4, merge "bpf: Export more bpf_burst related functions" into it. v4: - patch 2, a new cleanup for "bpf: Add bpf_mptcp_sched_ops". - patch 3 should be reverted. - patch 8, register kfunc_set. v3: - rebased. - put the "drop has_bytes_sent" squash-to patch into this set. v2: - update bpf_rr and bpf_burst With the newly added mptcp_subflow bpf_iter, we can get rid of the subflows array "contexts" in struct mptcp_sched_data. This set uses bpf_for_each(mptcp_subflow) helper to update all the bpf schedules: bpf_for_each(mptcp_subflow, subflow, (struct sock *)msk) { ... ... mptcp_subflow_set_scheduled(subflow, true); } Geliang Tang (8): Squash to "mptcp: add sched_data helpers" Squash to "bpf: Export mptcp packet scheduler helpers" Squash to "selftests/bpf: Add bpf_first scheduler & test" Squash to "selftests/bpf: Add bpf_bkup scheduler & test" Squash to "selftests/bpf: Add bpf_rr scheduler & test" Squash to "selftests/bpf: Add bpf_red scheduler & test" Squash to "selftests/bpf: Add bpf_burst scheduler & test" mptcp: drop subflow contexts in mptcp_sched_data include/net/mptcp.h | 4 - net/mptcp/bpf.c | 50 ++++++------ net/mptcp/protocol.h | 2 - net/mptcp/sched.c | 15 ---- tools/testing/selftests/bpf/progs/mptcp_bpf.h | 3 - .../selftests/bpf/progs/mptcp_bpf_bkup.c | 16 +--- .../selftests/bpf/progs/mptcp_bpf_burst.c | 78 +++++++------------ .../selftests/bpf/progs/mptcp_bpf_first.c | 8 +- .../selftests/bpf/progs/mptcp_bpf_red.c | 8 +- .../selftests/bpf/progs/mptcp_bpf_rr.c | 31 ++++---- 10 files changed, 81 insertions(+), 134 deletions(-) -- 2.43.0
From: Geliang Tang <tanggeliang@kylinos.cn> Drop bpf_mptcp_subflow_ctx_by_pos. Keep mptcp_sched_data_set_contexts for future use. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- net/mptcp/bpf.c | 8 -------- net/mptcp/protocol.h | 2 -- net/mptcp/sched.c | 15 --------------- 3 files changed, 25 deletions(-) diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/bpf.c +++ b/net/mptcp/bpf.c @@ -XXX,XX +XXX,XX @@ bpf_iter_mptcp_subflow_destroy(struct bpf_iter_mptcp_subflow *it) { } -__bpf_kfunc struct mptcp_subflow_context * -bpf_mptcp_subflow_ctx_by_pos(const struct mptcp_sched_data *data, unsigned int pos) -{ - if (pos >= MPTCP_SUBFLOWS_MAX) - return NULL; - return data->contexts[pos]; -} - __bpf_kfunc static bool bpf_mptcp_subflow_queues_empty(struct sock *sk) { return tcp_rtx_queue_empty(sk); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -XXX,XX +XXX,XX @@ void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk); void mptcp_sock_graft(struct sock *sk, struct socket *parent); u64 mptcp_wnd_end(const struct mptcp_sock *msk); void mptcp_set_timeout(struct sock *sk); -struct mptcp_subflow_context * -bpf_mptcp_subflow_ctx_by_pos(const struct mptcp_sched_data *data, unsigned int pos); struct sock *__mptcp_nmpc_sk(struct mptcp_sock *msk); bool __mptcp_close(struct sock *sk, long timeout); void mptcp_cancel_work(struct sock *sk); diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/sched.c +++ b/net/mptcp/sched.c @@ -XXX,XX +XXX,XX @@ void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow, static void mptcp_sched_data_set_contexts(const struct mptcp_sock *msk, struct mptcp_sched_data *data) { - struct mptcp_subflow_context *subflow; - int i = 0; - - mptcp_for_each_subflow(msk, subflow) { - if (i == MPTCP_SUBFLOWS_MAX) { - pr_warn_once("too many subflows"); - break; - } - mptcp_subflow_set_scheduled(subflow, false); - data->contexts[i++] = subflow; - } - data->subflows = i; - - for (; i < MPTCP_SUBFLOWS_MAX; i++) - data->contexts[i] = NULL; } int mptcp_sched_get_send(struct mptcp_sock *msk) -- 2.43.0
From: Geliang Tang <tanggeliang@kylinos.cn> Instead of adding a new BPF function bpf_mptcp_send_info_to_ssk() in v12, this patch uses a much more simpler approach, which using '__ign' suffix for the argument of bpf_mptcp_subflow_ctx() to let BPF to ignore the type check of this argument. Remove bpf_mptcp_subflow_ctx_by_pos from BPF kfunc set. Drop bpf_mptcp_sched_kfunc_set, use bpf_mptcp_common_kfunc_set instead. Add new helpers bpf_mptcp_subflow_tcp_sock() and bpf_sk_stream_memory_free(). Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- net/mptcp/bpf.c | 44 +++++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/bpf.c +++ b/net/mptcp/bpf.c @@ -XXX,XX +XXX,XX @@ struct bpf_iter_mptcp_subflow_kern { __bpf_kfunc_start_defs(); __bpf_kfunc static struct mptcp_subflow_context * -bpf_mptcp_subflow_ctx(const struct sock *sk) +bpf_mptcp_subflow_ctx(const struct sock *sk__ign) { + const struct sock *sk = sk__ign; + if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP && sk_is_mptcp(sk)) return mptcp_subflow_ctx(sk); @@ -XXX,XX +XXX,XX @@ bpf_mptcp_subflow_ctx(const struct sock *sk) return NULL; } +__bpf_kfunc static struct sock * +bpf_mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow) +{ + if (!subflow) + return NULL; + + return mptcp_subflow_tcp_sock(subflow); +} + __bpf_kfunc static int bpf_iter_mptcp_subflow_new(struct bpf_iter_mptcp_subflow *it, struct sock *sk) @@ -XXX,XX +XXX,XX @@ __bpf_kfunc static bool bpf_mptcp_subflow_queues_empty(struct sock *sk) return tcp_rtx_queue_empty(sk); } +__bpf_kfunc static bool bpf_sk_stream_memory_free(const struct sock *sk__ign) +{ + const struct sock *sk = sk__ign; + + if (sk && sk_fullsock(sk) && + sk->sk_protocol == IPPROTO_TCP && sk_is_mptcp(sk)) + return sk_stream_memory_free(sk); + + return NULL; +} + __bpf_kfunc_end_defs(); BTF_KFUNCS_START(bpf_mptcp_common_kfunc_ids) BTF_ID_FLAGS(func, bpf_mptcp_subflow_ctx, KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_mptcp_subflow_tcp_sock, KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_new, KF_ITER_NEW | KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_next, KF_ITER_NEXT | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_destroy, KF_ITER_DESTROY) -BTF_KFUNCS_END(bpf_mptcp_common_kfunc_ids) - -static const struct btf_kfunc_id_set bpf_mptcp_common_kfunc_set = { - .owner = THIS_MODULE, - .set = &bpf_mptcp_common_kfunc_ids, -}; - -BTF_KFUNCS_START(bpf_mptcp_sched_kfunc_ids) BTF_ID_FLAGS(func, mptcp_subflow_set_scheduled) -BTF_ID_FLAGS(func, bpf_mptcp_subflow_ctx_by_pos) BTF_ID_FLAGS(func, mptcp_subflow_active) BTF_ID_FLAGS(func, mptcp_set_timeout) BTF_ID_FLAGS(func, mptcp_wnd_end) -BTF_ID_FLAGS(func, tcp_stream_memory_free) +BTF_ID_FLAGS(func, bpf_sk_stream_memory_free, KF_RET_NULL) BTF_ID_FLAGS(func, bpf_mptcp_subflow_queues_empty) BTF_ID_FLAGS(func, mptcp_pm_subflow_chk_stale, KF_SLEEPABLE) -BTF_KFUNCS_END(bpf_mptcp_sched_kfunc_ids) +BTF_KFUNCS_END(bpf_mptcp_common_kfunc_ids) -static const struct btf_kfunc_id_set bpf_mptcp_sched_kfunc_set = { +static const struct btf_kfunc_id_set bpf_mptcp_common_kfunc_set = { .owner = THIS_MODULE, - .set = &bpf_mptcp_sched_kfunc_ids, + .set = &bpf_mptcp_common_kfunc_ids, }; static int __init bpf_mptcp_kfunc_init(void) @@ -XXX,XX +XXX,XX @@ static int __init bpf_mptcp_kfunc_init(void) ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCKOPT, &bpf_mptcp_common_kfunc_set); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, - &bpf_mptcp_sched_kfunc_set); + &bpf_mptcp_common_kfunc_set); #ifdef CONFIG_BPF_JIT ret = ret ?: register_bpf_struct_ops(&bpf_mptcp_sched_ops, mptcp_sched_ops); #endif -- 2.43.0
From: Geliang Tang <tanggeliang@kylinos.cn> Use the newly added bpf_for_each() helper to walk the conn_list. Drop bpf_mptcp_subflow_ctx_by_pos declaration. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- tools/testing/selftests/bpf/progs/mptcp_bpf.h | 3 --- tools/testing/selftests/bpf/progs/mptcp_bpf_first.c | 8 +++++++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h b/tools/testing/selftests/bpf/progs/mptcp_bpf.h index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h @@ -XXX,XX +XXX,XX @@ bpf_mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow) __ksym; extern void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow, bool scheduled) __ksym; -extern struct mptcp_subflow_context * -bpf_mptcp_subflow_ctx_by_pos(const struct mptcp_sched_data *data, unsigned int pos) __ksym; - #endif diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c @@ -XXX,XX +XXX,XX @@ SEC("struct_ops") int BPF_PROG(bpf_first_get_send, struct mptcp_sock *msk, struct mptcp_sched_data *data) { - mptcp_subflow_set_scheduled(bpf_mptcp_subflow_ctx_by_pos(data, 0), true); + struct mptcp_subflow_context *subflow; + + subflow = bpf_mptcp_subflow_ctx(msk->first); + if (!subflow) + return -1; + + mptcp_subflow_set_scheduled(subflow, true); return 0; } -- 2.43.0
From: Geliang Tang <tanggeliang@kylinos.cn> Use the newly added bpf_for_each() helper to walk the conn_list. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- .../testing/selftests/bpf/progs/mptcp_bpf_bkup.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_bkup.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_bkup.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_bkup.c +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_bkup.c @@ -XXX,XX +XXX,XX @@ SEC("struct_ops") int BPF_PROG(bpf_bkup_get_send, struct mptcp_sock *msk, struct mptcp_sched_data *data) { - int nr = -1; - - for (int i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) { - struct mptcp_subflow_context *subflow; - - subflow = bpf_mptcp_subflow_ctx_by_pos(data, i); - if (!subflow) - break; + struct mptcp_subflow_context *subflow; + bpf_for_each(mptcp_subflow, subflow, (struct sock *)msk) { if (!BPF_CORE_READ_BITFIELD_PROBED(subflow, backup) || !BPF_CORE_READ_BITFIELD_PROBED(subflow, request_bkup)) { - nr = i; + mptcp_subflow_set_scheduled(subflow, true); break; } } - if (nr != -1) { - mptcp_subflow_set_scheduled(bpf_mptcp_subflow_ctx_by_pos(data, nr), true); - return -1; - } return 0; } -- 2.43.0
From: Geliang Tang <tanggeliang@kylinos.cn> Use the newly added bpf_for_each() helper to walk the conn_list. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- .../selftests/bpf/progs/mptcp_bpf_rr.c | 31 +++++++++---------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c @@ -XXX,XX +XXX,XX @@ SEC("struct_ops") int BPF_PROG(bpf_rr_get_send, struct mptcp_sock *msk, struct mptcp_sched_data *data) { - struct mptcp_subflow_context *subflow; + struct mptcp_subflow_context *subflow, *next; struct mptcp_rr_storage *ptr; - struct sock *last_snd = NULL; - int nr = 0; ptr = bpf_sk_storage_get(&mptcp_rr_map, msk, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); if (!ptr) return -1; - last_snd = ptr->last_snd; + next = bpf_mptcp_subflow_ctx(msk->first); + if (!next) + return -1; - for (int i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) { - subflow = bpf_mptcp_subflow_ctx_by_pos(data, i); - if (!last_snd || !subflow) - break; + if (!ptr->last_snd) + goto out; - if (mptcp_subflow_tcp_sock(subflow) == last_snd) { - if (i + 1 == MPTCP_SUBFLOWS_MAX || - !bpf_mptcp_subflow_ctx_by_pos(data, i + 1)) + bpf_for_each(mptcp_subflow, subflow, (struct sock *)msk) { + if (mptcp_subflow_tcp_sock(subflow) == ptr->last_snd) { + subflow = bpf_iter_mptcp_subflow_next(&___it); + if (!subflow) break; - nr = i + 1; + next = subflow; break; } } - subflow = bpf_mptcp_subflow_ctx_by_pos(data, nr); - if (!subflow) - return -1; - mptcp_subflow_set_scheduled(subflow, true); - ptr->last_snd = mptcp_subflow_tcp_sock(subflow); +out: + mptcp_subflow_set_scheduled(next, true); + ptr->last_snd = mptcp_subflow_tcp_sock(next); return 0; } -- 2.43.0
From: Geliang Tang <tanggeliang@kylinos.cn> Use the newly added bpf_for_each() helper to walk the conn_list. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- tools/testing/selftests/bpf/progs/mptcp_bpf_red.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_red.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_red.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_red.c +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_red.c @@ -XXX,XX +XXX,XX @@ SEC("struct_ops") int BPF_PROG(bpf_red_get_send, struct mptcp_sock *msk, struct mptcp_sched_data *data) { - for (int i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) { - if (!bpf_mptcp_subflow_ctx_by_pos(data, i)) - break; + struct mptcp_subflow_context *subflow; - mptcp_subflow_set_scheduled(bpf_mptcp_subflow_ctx_by_pos(data, i), true); - } + bpf_for_each(mptcp_subflow, subflow, (struct sock *)msk) + mptcp_subflow_set_scheduled(subflow, true); return 0; } -- 2.43.0
From: Geliang Tang <tanggeliang@kylinos.cn> Use the newly added bpf_for_each() helper to walk the conn_list. Drop bpf_subflow_send_info, use subflow_send_info instead. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- .../selftests/bpf/progs/mptcp_bpf_burst.c | 78 +++++++------------ 1 file changed, 26 insertions(+), 52 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_burst.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_burst.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_burst.c +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_burst.c @@ -XXX,XX +XXX,XX @@ char _license[] SEC("license") = "GPL"; #define min(a, b) ((a) < (b) ? (a) : (b)) -struct bpf_subflow_send_info { - __u8 subflow_id; - __u64 linger_time; -}; - extern bool mptcp_subflow_active(struct mptcp_subflow_context *subflow) __ksym; extern void mptcp_set_timeout(struct sock *sk) __ksym; extern __u64 mptcp_wnd_end(const struct mptcp_sock *msk) __ksym; -extern bool tcp_stream_memory_free(const struct sock *sk, int wake) __ksym; +extern bool bpf_sk_stream_memory_free(const struct sock *sk) __ksym; extern bool bpf_mptcp_subflow_queues_empty(struct sock *sk) __ksym; extern void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) __ksym; @@ -XXX,XX +XXX,XX @@ static __always_inline bool tcp_rtx_and_write_queues_empty(struct sock *sk) return bpf_mptcp_subflow_queues_empty(sk) && tcp_write_queue_empty(sk); } -static __always_inline bool __sk_stream_memory_free(const struct sock *sk, int wake) -{ - if (sk->sk_wmem_queued >= sk->sk_sndbuf) - return false; - - return tcp_stream_memory_free(sk, wake); -} - -static __always_inline bool sk_stream_memory_free(const struct sock *sk) -{ - return __sk_stream_memory_free(sk, 0); -} - SEC("struct_ops") void BPF_PROG(mptcp_sched_burst_init, struct mptcp_sock *msk) { @@ -XXX,XX +XXX,XX @@ SEC("struct_ops") int BPF_PROG(bpf_burst_get_send, struct mptcp_sock *msk, struct mptcp_sched_data *data) { - struct bpf_subflow_send_info send_info[SSK_MODE_MAX]; + struct subflow_send_info send_info[SSK_MODE_MAX]; struct mptcp_subflow_context *subflow; struct sock *sk = (struct sock *)msk; __u32 pace, burst, wmem; @@ -XXX,XX +XXX,XX @@ int BPF_PROG(bpf_burst_get_send, struct mptcp_sock *msk, /* pick the subflow with the lower wmem/wspace ratio */ for (i = 0; i < SSK_MODE_MAX; ++i) { - send_info[i].subflow_id = MPTCP_SUBFLOWS_MAX; + send_info[i].ssk = NULL; send_info[i].linger_time = -1; } - for (i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) { - bool backup; - - subflow = bpf_mptcp_subflow_ctx_by_pos(data, i); - if (!subflow) - break; - - backup = subflow->backup || subflow->request_bkup; + bpf_for_each(mptcp_subflow, subflow, sk) { + bool backup = subflow->backup || subflow->request_bkup; ssk = mptcp_subflow_tcp_sock(subflow); if (!mptcp_subflow_active(subflow)) @@ -XXX,XX +XXX,XX @@ int BPF_PROG(bpf_burst_get_send, struct mptcp_sock *msk, linger_time = div_u64((__u64)ssk->sk_wmem_queued << 32, pace); if (linger_time < send_info[backup].linger_time) { - send_info[backup].subflow_id = i; + send_info[backup].ssk = ssk; send_info[backup].linger_time = linger_time; } } @@ -XXX,XX +XXX,XX @@ int BPF_PROG(bpf_burst_get_send, struct mptcp_sock *msk, /* pick the best backup if no other subflow is active */ if (!nr_active) - send_info[SSK_MODE_ACTIVE].subflow_id = send_info[SSK_MODE_BACKUP].subflow_id; + send_info[SSK_MODE_ACTIVE].ssk = send_info[SSK_MODE_BACKUP].ssk; - subflow = bpf_mptcp_subflow_ctx_by_pos(data, send_info[SSK_MODE_ACTIVE].subflow_id); - if (!subflow) + ssk = send_info[SSK_MODE_ACTIVE].ssk; + if (!ssk || !bpf_sk_stream_memory_free(ssk)) return -1; - ssk = mptcp_subflow_tcp_sock(subflow); - if (!ssk || !sk_stream_memory_free(ssk)) + + subflow = bpf_mptcp_subflow_ctx(ssk); + if (!subflow) return -1; burst = min(MPTCP_SEND_BURST_SIZE, mptcp_wnd_end(msk) - msk->snd_nxt); + ssk = bpf_core_cast(ssk, struct sock); wmem = ssk->sk_wmem_queued; if (!burst) goto out; @@ -XXX,XX +XXX,XX @@ SEC("struct_ops") int BPF_PROG(bpf_burst_get_retrans, struct mptcp_sock *msk, struct mptcp_sched_data *data) { - int backup = MPTCP_SUBFLOWS_MAX, pick = MPTCP_SUBFLOWS_MAX, subflow_id; + struct sock *backup = NULL, *pick = NULL; struct mptcp_subflow_context *subflow; int min_stale_count = INT_MAX; - struct sock *ssk; - for (int i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) { - subflow = bpf_mptcp_subflow_ctx_by_pos(data, i); - if (!subflow) - break; + bpf_for_each(mptcp_subflow, subflow, (struct sock *)msk) { + struct sock *ssk = bpf_mptcp_subflow_tcp_sock(subflow); - if (!mptcp_subflow_active(subflow)) + if (!ssk || !mptcp_subflow_active(subflow)) continue; - ssk = mptcp_subflow_tcp_sock(subflow); /* still data outstanding at TCP level? skip this */ if (!tcp_rtx_and_write_queues_empty(ssk)) { mptcp_pm_subflow_chk_stale(msk, ssk); @@ -XXX,XX +XXX,XX @@ int BPF_PROG(bpf_burst_get_retrans, struct mptcp_sock *msk, } if (subflow->backup || subflow->request_bkup) { - if (backup == MPTCP_SUBFLOWS_MAX) - backup = i; + if (!backup) + backup = ssk; continue; } - if (pick == MPTCP_SUBFLOWS_MAX) - pick = i; + if (!pick) + pick = ssk; } - if (pick < MPTCP_SUBFLOWS_MAX) { - subflow_id = pick; + if (pick) goto out; - } - subflow_id = min_stale_count > 1 ? backup : MPTCP_SUBFLOWS_MAX; + pick = min_stale_count > 1 ? backup : NULL; out: - subflow = bpf_mptcp_subflow_ctx_by_pos(data, subflow_id); + if (!pick) + return -1; + subflow = bpf_mptcp_subflow_ctx(pick); if (!subflow) return -1; mptcp_subflow_set_scheduled(subflow, true); -- 2.43.0
From: Geliang Tang <tanggeliang@kylinos.cn> The mptcp_subflow bpf_iter is added now, it's better to use the helper bpf_for_each(mptcp_subflow) to traverse all subflows on the conn_list of an MPTCP socket and then call kfunc to modify the fields of each subflow in the WIP MPTCP BPF packet scheduler examples, instead of converting them to a fixed array. With this helper, we can get rid of this subflow array "contexts" and the size of it "subflows" in struct mptcp_sched_data. And keep struct mptcp_sched_data for future use. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- include/net/mptcp.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/include/net/mptcp.h b/include/net/mptcp.h index XXXXXXX..XXXXXXX 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -XXX,XX +XXX,XX @@ struct mptcp_out_options { #define MPTCP_SCHED_MAX 128 #define MPTCP_SCHED_BUF_MAX (MPTCP_SCHED_NAME_MAX * MPTCP_SCHED_MAX) -#define MPTCP_SUBFLOWS_MAX 8 - struct mptcp_sched_data { - u8 subflows; - struct mptcp_subflow_context *contexts[MPTCP_SUBFLOWS_MAX]; }; struct mptcp_sched_ops { -- 2.43.0