:p
atchew
Login
From: Geliang Tang <tanggeliang@kylinos.cn> v4: - patch 2, a new cleanup for "bpf: Add bpf_mptcp_sched_ops". - patch 3 should be reverted. - patch 8, register kfunc_set. v3: - rebased. - put the "drop has_bytes_sent" squash-to patch into this set. v2: - update bpf_rr and bpf_burst With the newly added mptcp_subflow bpf_iter, we can get rid of the subflows array "contexts" in struct mptcp_sched_data. This set uses bpf_for_each(mptcp_subflow) helper to update all the bpf schedules: bpf_for_each(mptcp_subflow, subflow, msk) { ... ... mptcp_subflow_set_scheduled(subflow, true); } Depends on: - "add mptcp_subflow bpf_iter" v9 Based-on: <cover.1728466623.git.tanggeliang@kylinos.cn> Geliang Tang (11): Revert "mptcp: add sched_data helpers" Squash to "bpf: Add bpf_mptcp_sched_ops" Revert "bpf: Add bpf_mptcp_sched_kfunc_set" Squash to "selftests/bpf: Add bpf_first scheduler & test" Squash to "selftests/bpf: Add bpf_bkup scheduler & test" Squash to "selftests/bpf: Add bpf_rr scheduler & test" Squash to "selftests/bpf: Add bpf_red scheduler & test" Squash to "bpf: Export more bpf_burst related functions" Squash to "selftests/bpf: Add bpf_burst scheduler & test" mptcp: drop subflow contexts in mptcp_sched_data Squash to "selftests/bpf: Add bpf scheduler test" - drop has_bytes_sent include/net/mptcp.h | 2 - net/mptcp/bpf.c | 14 +--- net/mptcp/protocol.h | 3 - net/mptcp/sched.c | 22 ------ .../testing/selftests/bpf/prog_tests/mptcp.c | 48 ++++++------ tools/testing/selftests/bpf/progs/mptcp_bpf.h | 3 - .../selftests/bpf/progs/mptcp_bpf_bkup.c | 16 +--- .../selftests/bpf/progs/mptcp_bpf_burst.c | 78 +++++++++---------- .../selftests/bpf/progs/mptcp_bpf_bytes.c | 39 ++++++++++ .../selftests/bpf/progs/mptcp_bpf_first.c | 8 +- .../selftests/bpf/progs/mptcp_bpf_red.c | 8 +- .../selftests/bpf/progs/mptcp_bpf_rr.c | 24 +++--- 12 files changed, 131 insertions(+), 134 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/mptcp_bpf_bytes.c -- 2.43.0
From: Geliang Tang <tanggeliang@kylinos.cn> Drop this patch. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- net/mptcp/bpf.c | 8 -------- net/mptcp/protocol.h | 2 -- net/mptcp/sched.c | 22 ---------------------- 3 files changed, 32 deletions(-) diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/bpf.c +++ b/net/mptcp/bpf.c @@ -XXX,XX +XXX,XX @@ __bpf_kfunc void bpf_mptcp_sock_release(struct mptcp_sock *msk) WARN_ON_ONCE(!sk || !refcount_dec_not_one(&sk->sk_refcnt)); } -__bpf_kfunc struct mptcp_subflow_context * -bpf_mptcp_subflow_ctx_by_pos(const struct mptcp_sched_data *data, unsigned int pos) -{ - if (pos >= MPTCP_SUBFLOWS_MAX) - return NULL; - return data->contexts[pos]; -} - __bpf_kfunc bool bpf_mptcp_subflow_queues_empty(struct sock *sk) { return tcp_rtx_queue_empty(sk); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -XXX,XX +XXX,XX @@ void mptcp_sock_graft(struct sock *sk, struct socket *parent); u64 mptcp_wnd_end(const struct mptcp_sock *msk); void mptcp_set_timeout(struct sock *sk); bool bpf_mptcp_subflow_queues_empty(struct sock *sk); -struct mptcp_subflow_context * -bpf_mptcp_subflow_ctx_by_pos(const struct mptcp_sched_data *data, unsigned int pos); struct sock *__mptcp_nmpc_sk(struct mptcp_sock *msk); bool __mptcp_close(struct sock *sk, long timeout); void mptcp_cancel_work(struct sock *sk); diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/sched.c +++ b/net/mptcp/sched.c @@ -XXX,XX +XXX,XX @@ void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow, WRITE_ONCE(subflow->scheduled, scheduled); } -static void mptcp_sched_data_set_contexts(const struct mptcp_sock *msk, - struct mptcp_sched_data *data) -{ - struct mptcp_subflow_context *subflow; - int i = 0; - - mptcp_for_each_subflow(msk, subflow) { - if (i == MPTCP_SUBFLOWS_MAX) { - pr_warn_once("too many subflows"); - break; - } - mptcp_subflow_set_scheduled(subflow, false); - data->contexts[i++] = subflow; - } - data->subflows = i; - - for (; i < MPTCP_SUBFLOWS_MAX; i++) - data->contexts[i] = NULL; -} - int mptcp_sched_get_send(struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow; @@ -XXX,XX +XXX,XX @@ int mptcp_sched_get_send(struct mptcp_sock *msk) data.reinject = false; if (msk->sched == &mptcp_sched_default || !msk->sched) return mptcp_sched_default_get_subflow(msk, &data); - mptcp_sched_data_set_contexts(msk, &data); return msk->sched->get_subflow(msk, &data); } @@ -XXX,XX +XXX,XX @@ int mptcp_sched_get_retrans(struct mptcp_sock *msk) data.reinject = true; if (msk->sched == &mptcp_sched_default || !msk->sched) return mptcp_sched_default_get_subflow(msk, &data); - mptcp_sched_data_set_contexts(msk, &data); return msk->sched->get_subflow(msk, &data); } -- 2.43.0
From: Geliang Tang <tanggeliang@kylinos.cn> Please update the subject to bpf: Add mptcp packet scheduler struct_ops A small cleanup. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- net/mptcp/bpf.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/bpf.c +++ b/net/mptcp/bpf.c @@ -XXX,XX +XXX,XX @@ #ifdef CONFIG_BPF_JIT static struct bpf_struct_ops bpf_mptcp_sched_ops; -static const struct btf_type *mptcp_sock_type, *mptcp_subflow_type __read_mostly; +static const struct btf_type *mptcp_sock_type, + *mptcp_subflow_type __read_mostly; static u32 mptcp_sock_id, mptcp_subflow_id; +/* MPTCP BPF packet scheduler */ + static const struct bpf_func_proto * bpf_mptcp_sched_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) -- 2.43.0
From: Geliang Tang <tanggeliang@kylinos.cn> Remove bpf_mptcp_subflow_ctx_by_pos from kfunc_set, it becomes empty now. Drop this patch. It should be registered in the commit "bpf: Export more bpf_burst related functions" Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- net/mptcp/bpf.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/bpf.c +++ b/net/mptcp/bpf.c @@ -XXX,XX +XXX,XX @@ static const struct btf_kfunc_id_set bpf_mptcp_common_kfunc_set = { .set = &bpf_mptcp_common_kfunc_ids, }; -BTF_KFUNCS_START(bpf_mptcp_sched_kfunc_ids) -BTF_ID_FLAGS(func, bpf_mptcp_subflow_ctx_by_pos) +/* BTF_ID_FLAGS(func, mptcp_set_timeout) BTF_ID_FLAGS(func, mptcp_wnd_end) BTF_ID_FLAGS(func, tcp_stream_memory_free) BTF_ID_FLAGS(func, bpf_mptcp_subflow_queues_empty) BTF_ID_FLAGS(func, mptcp_pm_subflow_chk_stale) -BTF_KFUNCS_END(bpf_mptcp_sched_kfunc_ids) - -static const struct btf_kfunc_id_set bpf_mptcp_sched_kfunc_set = { - .owner = THIS_MODULE, - .set = &bpf_mptcp_sched_kfunc_ids, -}; +*/ static int __init bpf_mptcp_kfunc_init(void) { @@ -XXX,XX +XXX,XX @@ static int __init bpf_mptcp_kfunc_init(void) ret = register_btf_fmodret_id_set(&bpf_mptcp_fmodret_set); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &bpf_mptcp_common_kfunc_set); - ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, - &bpf_mptcp_sched_kfunc_set); #ifdef CONFIG_BPF_JIT ret = ret ?: register_bpf_struct_ops(&bpf_mptcp_sched_ops, mptcp_sched_ops); #endif -- 2.43.0
From: Geliang Tang <tanggeliang@kylinos.cn> Use the newly added bpf_for_each() helper to walk the conn_list. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- tools/testing/selftests/bpf/progs/mptcp_bpf.h | 3 --- tools/testing/selftests/bpf/progs/mptcp_bpf_first.c | 8 +++++++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h b/tools/testing/selftests/bpf/progs/mptcp_bpf.h index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h @@ -XXX,XX +XXX,XX @@ bpf_mptcp_subflow_ctx(const struct sock *sk) __ksym; extern struct sock * bpf_mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow) __ksym; -extern struct mptcp_subflow_context * -bpf_mptcp_subflow_ctx_by_pos(const struct mptcp_sched_data *data, unsigned int pos) __ksym; - #endif diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c @@ -XXX,XX +XXX,XX @@ SEC("struct_ops") int BPF_PROG(bpf_first_get_subflow, struct mptcp_sock *msk, struct mptcp_sched_data *data) { - mptcp_subflow_set_scheduled(bpf_mptcp_subflow_ctx_by_pos(data, 0), true); + struct mptcp_subflow_context *subflow; + + bpf_for_each(mptcp_subflow, subflow, msk) { + mptcp_subflow_set_scheduled(subflow, true); + break; + } + return 0; } -- 2.43.0
From: Geliang Tang <tanggeliang@kylinos.cn> Use the newly added bpf_for_each() helper to walk the conn_list. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- .../testing/selftests/bpf/progs/mptcp_bpf_bkup.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_bkup.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_bkup.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_bkup.c +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_bkup.c @@ -XXX,XX +XXX,XX @@ SEC("struct_ops") int BPF_PROG(bpf_bkup_get_subflow, struct mptcp_sock *msk, struct mptcp_sched_data *data) { - int nr = -1; - - for (int i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) { - struct mptcp_subflow_context *subflow; - - subflow = bpf_mptcp_subflow_ctx_by_pos(data, i); - if (!subflow) - break; + struct mptcp_subflow_context *subflow; + bpf_for_each(mptcp_subflow, subflow, msk) { if (!BPF_CORE_READ_BITFIELD_PROBED(subflow, backup) || !BPF_CORE_READ_BITFIELD_PROBED(subflow, request_bkup)) { - nr = i; + mptcp_subflow_set_scheduled(subflow, true); break; } } - if (nr != -1) { - mptcp_subflow_set_scheduled(bpf_mptcp_subflow_ctx_by_pos(data, nr), true); - return -1; - } return 0; } -- 2.43.0
From: Geliang Tang <tanggeliang@kylinos.cn> Use the newly added bpf_for_each() helper to walk the conn_list. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- .../selftests/bpf/progs/mptcp_bpf_rr.c | 24 ++++++++----------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c @@ -XXX,XX +XXX,XX @@ SEC("struct_ops") int BPF_PROG(bpf_rr_get_subflow, struct mptcp_sock *msk, struct mptcp_sched_data *data) { - struct mptcp_subflow_context *subflow; + struct mptcp_subflow_context *subflow, *next; struct mptcp_rr_storage *ptr; struct sock *last_snd = NULL; - int nr = 0; ptr = bpf_sk_storage_get(&mptcp_rr_map, msk, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); @@ -XXX,XX +XXX,XX @@ int BPF_PROG(bpf_rr_get_subflow, struct mptcp_sock *msk, return -1; last_snd = ptr->last_snd; + next = bpf_mptcp_subflow_ctx(msk->first); - for (int i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) { - subflow = bpf_mptcp_subflow_ctx_by_pos(data, i); - if (!last_snd || !subflow) + bpf_for_each(mptcp_subflow, subflow, msk) { + if (!last_snd) break; - if (mptcp_subflow_tcp_sock(subflow) == last_snd) { - if (i + 1 == MPTCP_SUBFLOWS_MAX || - !bpf_mptcp_subflow_ctx_by_pos(data, i + 1)) + if (bpf_mptcp_subflow_tcp_sock(subflow) == last_snd) { + subflow = bpf_iter_mptcp_subflow_next(&___it); + if (!subflow) break; - nr = i + 1; + next = subflow; break; } } - subflow = bpf_mptcp_subflow_ctx_by_pos(data, nr); - if (!subflow) - return -1; - mptcp_subflow_set_scheduled(subflow, true); - ptr->last_snd = mptcp_subflow_tcp_sock(subflow); + mptcp_subflow_set_scheduled(next, true); + ptr->last_snd = bpf_mptcp_subflow_tcp_sock(next); return 0; } -- 2.43.0
From: Geliang Tang <tanggeliang@kylinos.cn> Use the newly added bpf_for_each() helper to walk the conn_list. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- tools/testing/selftests/bpf/progs/mptcp_bpf_red.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_red.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_red.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_red.c +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_red.c @@ -XXX,XX +XXX,XX @@ SEC("struct_ops") int BPF_PROG(bpf_red_get_subflow, struct mptcp_sock *msk, struct mptcp_sched_data *data) { - for (int i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) { - if (!bpf_mptcp_subflow_ctx_by_pos(data, i)) - break; + struct mptcp_subflow_context *subflow; - mptcp_subflow_set_scheduled(bpf_mptcp_subflow_ctx_by_pos(data, i), true); - } + bpf_for_each(mptcp_subflow, subflow, msk) + mptcp_subflow_set_scheduled(subflow, true); return 0; } -- 2.43.0
From: Geliang Tang <tanggeliang@kylinos.cn> Drop bpf_mptcp_subflow_queues_empty declaration. Register kfunc set again. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- net/mptcp/bpf.c | 11 +++++++++-- net/mptcp/protocol.h | 1 - 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/bpf.c +++ b/net/mptcp/bpf.c @@ -XXX,XX +XXX,XX @@ static const struct btf_kfunc_id_set bpf_mptcp_common_kfunc_set = { .set = &bpf_mptcp_common_kfunc_ids, }; -/* +BTF_KFUNCS_START(bpf_mptcp_sched_kfunc_ids) BTF_ID_FLAGS(func, mptcp_set_timeout) BTF_ID_FLAGS(func, mptcp_wnd_end) BTF_ID_FLAGS(func, tcp_stream_memory_free) BTF_ID_FLAGS(func, bpf_mptcp_subflow_queues_empty) BTF_ID_FLAGS(func, mptcp_pm_subflow_chk_stale) -*/ +BTF_KFUNCS_END(bpf_mptcp_sched_kfunc_ids) + +static const struct btf_kfunc_id_set bpf_mptcp_sched_kfunc_set = { + .owner = THIS_MODULE, + .set = &bpf_mptcp_sched_kfunc_ids, +}; static int __init bpf_mptcp_kfunc_init(void) { @@ -XXX,XX +XXX,XX @@ static int __init bpf_mptcp_kfunc_init(void) ret = register_btf_fmodret_id_set(&bpf_mptcp_fmodret_set); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &bpf_mptcp_common_kfunc_set); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, + &bpf_mptcp_sched_kfunc_set); #ifdef CONFIG_BPF_JIT ret = ret ?: register_bpf_struct_ops(&bpf_mptcp_sched_ops, mptcp_sched_ops); #endif diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -XXX,XX +XXX,XX @@ void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk); void mptcp_sock_graft(struct sock *sk, struct socket *parent); u64 mptcp_wnd_end(const struct mptcp_sock *msk); void mptcp_set_timeout(struct sock *sk); -bool bpf_mptcp_subflow_queues_empty(struct sock *sk); struct sock *__mptcp_nmpc_sk(struct mptcp_sock *msk); bool __mptcp_close(struct sock *sk, long timeout); void mptcp_cancel_work(struct sock *sk); -- 2.43.0
From: Geliang Tang <tanggeliang@kylinos.cn> Use the newly added bpf_for_each() helper to walk the conn_list. Drop mptcp_subflow_active declaration. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- .../selftests/bpf/progs/mptcp_bpf_burst.c | 78 +++++++++---------- 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_burst.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_burst.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_burst.c +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_burst.c @@ -XXX,XX +XXX,XX @@ char _license[] SEC("license") = "GPL"; #define min(a, b) ((a) < (b) ? (a) : (b)) +#define SSK_MODE_ACTIVE 0 +#define SSK_MODE_BACKUP 1 +#define SSK_MODE_MAX 2 + struct bpf_subflow_send_info { __u8 subflow_id; __u64 linger_time; }; -extern bool mptcp_subflow_active(struct mptcp_subflow_context *subflow) __ksym; extern void mptcp_set_timeout(struct sock *sk) __ksym; extern __u64 mptcp_wnd_end(const struct mptcp_sock *msk) __ksym; extern bool tcp_stream_memory_free(const struct sock *sk, int wake) __ksym; extern bool bpf_mptcp_subflow_queues_empty(struct sock *sk) __ksym; extern void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) __ksym; -#define SSK_MODE_ACTIVE 0 -#define SSK_MODE_BACKUP 1 -#define SSK_MODE_MAX 2 - static __always_inline __u64 div_u64(__u64 dividend, __u32 divisor) { return dividend / divisor; @@ -XXX,XX +XXX,XX @@ static __always_inline bool sk_stream_memory_free(const struct sock *sk) return __sk_stream_memory_free(sk, 0); } +static struct mptcp_subflow_context * +mptcp_lookup_subflow_by_id(struct mptcp_sock *msk, unsigned int id) +{ + struct mptcp_subflow_context *subflow; + + bpf_for_each(mptcp_subflow, subflow, msk) { + if (subflow->subflow_id == id) + return subflow; + } + + return NULL; +} + SEC("struct_ops") void BPF_PROG(mptcp_sched_burst_init, struct mptcp_sock *msk) { @@ -XXX,XX +XXX,XX @@ void BPF_PROG(mptcp_sched_burst_release, struct mptcp_sock *msk) { } -static int bpf_burst_get_send(struct mptcp_sock *msk, - struct mptcp_sched_data *data) +static int bpf_burst_get_send(struct mptcp_sock *msk) { struct bpf_subflow_send_info send_info[SSK_MODE_MAX]; struct mptcp_subflow_context *subflow; @@ -XXX,XX +XXX,XX @@ static int bpf_burst_get_send(struct mptcp_sock *msk, send_info[i].linger_time = -1; } - for (i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) { - bool backup; + bpf_for_each(mptcp_subflow, subflow, msk) { + bool backup = subflow->backup || subflow->request_bkup; - subflow = bpf_mptcp_subflow_ctx_by_pos(data, i); - if (!subflow) - break; - - backup = subflow->backup || subflow->request_bkup; - - ssk = mptcp_subflow_tcp_sock(subflow); + ssk = bpf_mptcp_subflow_tcp_sock(subflow); if (!mptcp_subflow_active(subflow)) continue; @@ -XXX,XX +XXX,XX @@ static int bpf_burst_get_send(struct mptcp_sock *msk, linger_time = div_u64((__u64)ssk->sk_wmem_queued << 32, pace); if (linger_time < send_info[backup].linger_time) { - send_info[backup].subflow_id = i; + send_info[backup].subflow_id = subflow->subflow_id; send_info[backup].linger_time = linger_time; } } @@ -XXX,XX +XXX,XX @@ static int bpf_burst_get_send(struct mptcp_sock *msk, if (!nr_active) send_info[SSK_MODE_ACTIVE].subflow_id = send_info[SSK_MODE_BACKUP].subflow_id; - subflow = bpf_mptcp_subflow_ctx_by_pos(data, send_info[SSK_MODE_ACTIVE].subflow_id); + subflow = mptcp_lookup_subflow_by_id(msk, send_info[SSK_MODE_ACTIVE].subflow_id); if (!subflow) return -1; - ssk = mptcp_subflow_tcp_sock(subflow); + ssk = bpf_mptcp_subflow_tcp_sock(subflow); if (!ssk || !sk_stream_memory_free(ssk)) return -1; @@ -XXX,XX +XXX,XX @@ static int bpf_burst_get_send(struct mptcp_sock *msk, return 0; } -static int bpf_burst_get_retrans(struct mptcp_sock *msk, - struct mptcp_sched_data *data) +static int bpf_burst_get_retrans(struct mptcp_sock *msk) { - int backup = MPTCP_SUBFLOWS_MAX, pick = MPTCP_SUBFLOWS_MAX, subflow_id; + struct sock *backup = NULL, *pick = NULL; struct mptcp_subflow_context *subflow; int min_stale_count = INT_MAX; - struct sock *ssk; - for (int i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) { - subflow = bpf_mptcp_subflow_ctx_by_pos(data, i); - if (!subflow) - break; + bpf_for_each(mptcp_subflow, subflow, msk) { + struct sock *ssk = bpf_mptcp_subflow_tcp_sock(subflow); if (!mptcp_subflow_active(subflow)) continue; - ssk = mptcp_subflow_tcp_sock(subflow); /* still data outstanding at TCP level? skip this */ if (!tcp_rtx_and_write_queues_empty(ssk)) { mptcp_pm_subflow_chk_stale(msk, ssk); @@ -XXX,XX +XXX,XX @@ static int bpf_burst_get_retrans(struct mptcp_sock *msk, } if (subflow->backup || subflow->request_bkup) { - if (backup == MPTCP_SUBFLOWS_MAX) - backup = i; + if (!backup) + backup = ssk; continue; } - if (pick == MPTCP_SUBFLOWS_MAX) - pick = i; + if (!pick) + pick = ssk; } - if (pick < MPTCP_SUBFLOWS_MAX) { - subflow_id = pick; + if (pick) goto out; - } - subflow_id = min_stale_count > 1 ? backup : MPTCP_SUBFLOWS_MAX; + pick = min_stale_count > 1 ? backup : NULL; out: - subflow = bpf_mptcp_subflow_ctx_by_pos(data, subflow_id); + if (!pick) + return -1; + subflow = bpf_mptcp_subflow_ctx(pick); if (!subflow) return -1; mptcp_subflow_set_scheduled(subflow, true); @@ -XXX,XX +XXX,XX @@ int BPF_PROG(bpf_burst_get_subflow, struct mptcp_sock *msk, struct mptcp_sched_data *data) { if (data->reinject) - return bpf_burst_get_retrans(msk, data); - return bpf_burst_get_send(msk, data); + return bpf_burst_get_retrans(msk); + return bpf_burst_get_send(msk); } SEC(".struct_ops") -- 2.43.0
From: Geliang Tang <tanggeliang@kylinos.cn> The mptcp_subflow bpf_iter is added now, it's better to use the helper bpf_for_each(mptcp_subflow) to traverse all subflows on the conn_list of an MPTCP socket and then call kfunc to modify the fields of each subflow in the WIP MPTCP BPF packet scheduler examples, instead of converting them to a fixed array. With this helper, we can get rid of this subflow array "contexts" and the size of it "subflows" in struct mptcp_sched_data. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- include/net/mptcp.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/net/mptcp.h b/include/net/mptcp.h index XXXXXXX..XXXXXXX 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -XXX,XX +XXX,XX @@ struct mptcp_out_options { struct mptcp_sched_data { bool reinject; - u8 subflows; - struct mptcp_subflow_context *contexts[MPTCP_SUBFLOWS_MAX]; }; struct mptcp_sched_ops { -- 2.43.0
From: Geliang Tang <tanggeliang@kylinos.cn> Drop ss_search() and has_bytes_sent(), add a new bpf program to check the bytes_sent. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- .../testing/selftests/bpf/prog_tests/mptcp.c | 48 ++++++++++--------- .../selftests/bpf/progs/mptcp_bpf_bytes.c | 39 +++++++++++++++ 2 files changed, 65 insertions(+), 22 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/mptcp_bpf_bytes.c diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -XXX,XX +XXX,XX @@ #include "mptcpify.skel.h" #include "mptcp_subflow.skel.h" #include "mptcp_bpf_iters_subflow.skel.h" +#include "mptcp_bpf_bytes.skel.h" #include "mptcp_bpf_first.skel.h" #include "mptcp_bpf_bkup.skel.h" #include "mptcp_bpf_rr.skel.h" @@ -XXX,XX +XXX,XX @@ static struct nstoken *sched_init(char *flags, char *sched) return NULL; } -static int ss_search(char *src, char *dst, char *port, char *keyword) -{ - return SYS_NOFAIL("ip netns exec %s ss -enita src %s dst %s %s %d | grep -q '%s'", - NS_TEST, src, dst, port, PORT_1, keyword); -} - -static int has_bytes_sent(char *dst) -{ - return ss_search(ADDR_1, dst, "sport", "bytes_sent:"); -} - static void send_data_and_verify(char *sched, bool addr1, bool addr2) { + int server_fd, client_fd, err; + struct mptcp_bpf_bytes *skel; struct timespec start, end; - int server_fd, client_fd; unsigned int delta_ms; + skel = mptcp_bpf_bytes__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load: bytes")) + return; + + skel->bss->pid = getpid(); + + err = mptcp_bpf_bytes__attach(skel); + if (!ASSERT_OK(err, "skel_attach: bytes")) + goto skel_destroy; + server_fd = start_mptcp_server(AF_INET, ADDR_1, PORT_1, 0); if (!ASSERT_OK_FD(server_fd, "start_mptcp_server")) - return; + goto skel_destroy; client_fd = connect_to_fd(server_fd, 0); if (!ASSERT_OK_FD(client_fd, "connect_to_fd")) - goto fail; + goto close_server; if (clock_gettime(CLOCK_MONOTONIC, &start) < 0) - goto fail; + goto close_client; if (!ASSERT_OK(send_recv_data(server_fd, client_fd, total_bytes), "send_recv_data")) - goto fail; + goto close_client; if (clock_gettime(CLOCK_MONOTONIC, &end) < 0) - goto fail; + goto close_client; delta_ms = (end.tv_sec - start.tv_sec) * 1000 + (end.tv_nsec - start.tv_nsec) / 1000000; printf("%s: %u ms\n", sched, delta_ms); if (addr1) - CHECK(has_bytes_sent(ADDR_1), sched, "should have bytes_sent on addr1\n"); + ASSERT_GT(skel->bss->bytes_sent_1, 0, "should have bytes_sent on addr1"); else - CHECK(!has_bytes_sent(ADDR_1), sched, "shouldn't have bytes_sent on addr1\n"); + ASSERT_EQ(skel->bss->bytes_sent_1, 0, "shouldn't have bytes_sent on addr1"); if (addr2) - CHECK(has_bytes_sent(ADDR_2), sched, "should have bytes_sent on addr2\n"); + ASSERT_GT(skel->bss->bytes_sent_2, 0, "should have bytes_sent on addr2"); else - CHECK(!has_bytes_sent(ADDR_2), sched, "shouldn't have bytes_sent on addr2\n"); + ASSERT_EQ(skel->bss->bytes_sent_2, 0, "shouldn't have bytes_sent on addr2"); +close_client: close(client_fd); -fail: +close_server: close(server_fd); +skel_destroy: + mptcp_bpf_bytes__destroy(skel); } static void test_default(void) diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_bytes.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_bytes.c new file mode 100644 index XXXXXXX..XXXXXXX --- /dev/null +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_bytes.c @@ -XXX,XX +XXX,XX @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024, Kylin Software */ + +/* vmlinux.h, bpf_helpers.h and other 'define' */ +#include "bpf_tracing_net.h" +#include "mptcp_bpf.h" + +char _license[] SEC("license") = "GPL"; +u64 bytes_sent_1 = 0; +u64 bytes_sent_2 = 0; +int pid; + +SEC("fexit/mptcp_sched_get_send") +int BPF_PROG(trace_mptcp_sched_get_send, struct mptcp_sock *msk) +{ + struct mptcp_subflow_context *subflow; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + if (!msk->pm.server_side) + return 0; + + mptcp_for_each_subflow(msk, subflow) { + struct tcp_sock *tp; + struct sock *ssk; + + subflow = bpf_core_cast(subflow, struct mptcp_subflow_context); + ssk = mptcp_subflow_tcp_sock(subflow); + tp = bpf_core_cast(ssk, struct tcp_sock); + + if (subflow->subflow_id == 1) + bytes_sent_1 = tp->bytes_sent; + else if (subflow->subflow_id == 2) + bytes_sent_2 = tp->bytes_sent; + } + + return 0; +} -- 2.43.0
From: Geliang Tang <tanggeliang@kylinos.cn> v8: - address Mat's comments in v7. - move sk_stream_memory_free check inside bpf_for_each() loop. - implement mptcp_subflow_set_scheduled helper in BPF. - add cleanup patches into this set again. v7: - move cleanup patches out of this set. - rebased. v6: - rebased to "add mptcp_subflow bpf_iter" v10 v5: - patch 2, drop mptcp_sock_type and mptcp_subflow_type. - patch 3, revert "bpf: Export more bpf_burst related functions" - patch 4, merge "bpf: Export more bpf_burst related functions" into it. v4: - patch 2, a new cleanup for "bpf: Add bpf_mptcp_sched_ops". - patch 3 should be reverted. - patch 8, register kfunc_set. v3: - rebased. - put the "drop has_bytes_sent" squash-to patch into this set. v2: - update bpf_rr and bpf_burst With the newly added mptcp_subflow bpf_iter, we can get rid of the subflows array "contexts" in struct mptcp_sched_data. This set uses bpf_for_each(mptcp_subflow) helper to update all the bpf schedules: bpf_for_each(mptcp_subflow, subflow, msk) { ... ... mptcp_subflow_set_scheduled(subflow, true); } Geliang Tang (13): mptcp: check sk_stream_memory_free in loop mptcp: make mptcp_subflow_set_scheduled inline mptcp: add mptcp_validate_scheduler helper Squash to "bpf: Add bpf_mptcp_sched_ops" Squash to "selftests/bpf: Add bpf scheduler test" Squash to "selftests/bpf: Add bpf_first scheduler & test" Squash to "selftests/bpf: Add bpf_bkup scheduler & test" Squash to "selftests/bpf: Add bpf_rr scheduler & test" Squash to "selftests/bpf: Add bpf_red scheduler & test" Squash to "selftests/bpf: Add bpf_burst scheduler & test" Squash to "bpf: Export mptcp packet scheduler helpers" Revert "mptcp: add sched_data helpers" mptcp: drop subflow contexts in mptcp_sched_data include/net/mptcp.h | 2 - net/mptcp/bpf.c | 45 +++++------ net/mptcp/protocol.c | 5 +- net/mptcp/protocol.h | 12 ++- net/mptcp/sched.c | 45 ++++------- .../testing/selftests/bpf/prog_tests/mptcp.c | 45 +++++------ tools/testing/selftests/bpf/progs/mptcp_bpf.h | 14 ++-- .../selftests/bpf/progs/mptcp_bpf_bkup.c | 18 +---- .../selftests/bpf/progs/mptcp_bpf_burst.c | 74 ++++++++----------- .../selftests/bpf/progs/mptcp_bpf_first.c | 4 +- .../selftests/bpf/progs/mptcp_bpf_red.c | 10 +-- .../selftests/bpf/progs/mptcp_bpf_rr.c | 33 ++++----- 12 files changed, 131 insertions(+), 176 deletions(-) -- 2.45.2
From: Geliang Tang <tanggeliang@kylinos.cn> In order to make it easier to implement similar logic to the burst scheduler in BPF program, the sk_stream_memory_free() check is moved forward into mptcp_for_each_subflow loop. Assignment like "ssk = send_info[SSK_MODE_ACTIVE].ssk" is not allowed in BPF, so bpf_core_cast() is used to cast it. But a casted pointer can't pass to a kfunc (sk_stream_memory_free). To solve this, In BPF burst scheduler sk_stream_memory_free check is moved forward to the position of mptcp_subflow_active() in bpf_for_each() loop. To keep mptcp_subflow_get_send() and BPF burst scheduler code consistent, this patch also synchronizes the change in BPF burst scheduler into mptcp_subflow_get_send(). Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- net/mptcp/protocol.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -XXX,XX +XXX,XX @@ struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) trace_mptcp_subflow_get_send(subflow); ssk = mptcp_subflow_tcp_sock(subflow); - if (!mptcp_subflow_active(subflow)) + if (!mptcp_subflow_active(subflow) || + !sk_stream_memory_free(ssk)) continue; tout = max(tout, mptcp_timeout_from_subflow(subflow)); @@ -XXX,XX +XXX,XX @@ struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) * to check that subflow has a non empty cwin. */ ssk = send_info[SSK_MODE_ACTIVE].ssk; - if (!ssk || !sk_stream_memory_free(ssk)) + if (!ssk) return NULL; burst = min_t(int, MPTCP_SEND_BURST_SIZE, mptcp_wnd_end(msk) - msk->snd_nxt); -- 2.45.2
From: Geliang Tang <tanggeliang@kylinos.cn> Now mptcp_subflow_set_scheduled is no longer used in BPF, but only in kernel. A new mptcp_subflow_set_scheduled helper has been implemented in BPF specifically for use with BPF. To avoid compilation errors due to duplicate names, this function has been made inline in kernel and moved to protocol.h. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- net/mptcp/protocol.h | 9 +++++++-- net/mptcp/sched.c | 6 ------ 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -XXX,XX +XXX,XX @@ void mptcp_sched_init(void); int mptcp_init_sched(struct mptcp_sock *msk, struct mptcp_sched_ops *sched); void mptcp_release_sched(struct mptcp_sock *msk); -void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow, - bool scheduled); struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk); struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk); int mptcp_sched_get_send(struct mptcp_sock *msk); int mptcp_sched_get_retrans(struct mptcp_sock *msk); +static inline void +mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow, + bool scheduled) +{ + WRITE_ONCE(subflow->scheduled, scheduled); +} + static inline u64 mptcp_data_avail(const struct mptcp_sock *msk) { return READ_ONCE(msk->bytes_received) - READ_ONCE(msk->bytes_consumed); diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/sched.c +++ b/net/mptcp/sched.c @@ -XXX,XX +XXX,XX @@ void mptcp_release_sched(struct mptcp_sock *msk) bpf_module_put(sched, sched->owner); } -void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow, - bool scheduled) -{ - WRITE_ONCE(subflow->scheduled, scheduled); -} - static void mptcp_sched_data_set_contexts(const struct mptcp_sock *msk, struct mptcp_sched_data *data) { -- 2.45.2
From: Geliang Tang <tanggeliang@kylinos.cn> New interface .validate is added in bpf_struct_ops recently, implement .validate as mptcp_validate_scheduler() for struct mptcp_sched_ops. In it check whether the required ops "get_subflow" of struct mptcp_sched_ops is implemented. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- net/mptcp/protocol.h | 1 + net/mptcp/sched.c | 17 +++++++++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -XXX,XX +XXX,XX @@ void mptcp_info2sockaddr(const struct mptcp_addr_info *info, struct sockaddr_storage *addr, unsigned short family); struct mptcp_sched_ops *mptcp_sched_find(const char *name); +int mptcp_validate_scheduler(struct mptcp_sched_ops *sched); int mptcp_register_scheduler(struct mptcp_sched_ops *sched); void mptcp_unregister_scheduler(struct mptcp_sched_ops *sched); void mptcp_sched_init(void); diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/sched.c +++ b/net/mptcp/sched.c @@ -XXX,XX +XXX,XX @@ void mptcp_get_available_schedulers(char *buf, size_t maxlen) rcu_read_unlock(); } -int mptcp_register_scheduler(struct mptcp_sched_ops *sched) +int mptcp_validate_scheduler(struct mptcp_sched_ops *sched) { - if (!sched->get_subflow) + if (!sched->get_subflow) { + pr_err("%s does not implement required ops\n", sched->name); return -EINVAL; + } + + return 0; +} + +int mptcp_register_scheduler(struct mptcp_sched_ops *sched) +{ + int ret; + + ret = mptcp_validate_scheduler(sched); + if (ret) + return ret; spin_lock(&mptcp_sched_list_lock); if (mptcp_sched_find(sched->name)) { -- 2.45.2
From: Geliang Tang <tanggeliang@kylinos.cn> Please update the subject to bpf: Add mptcp packet scheduler struct_ops 1. validate interface is added in bpf_struct_ops by commit 68b04864ca42 ("bpf: Create links for BPF struct_ops maps."), implement it in mptcp_sched_ops. 2. Drop mptcp_sched_find. This part of mptcp_sched_find() code comes from bpf_tcp_ca_init_member, but it was recently deleted by commit 68b04864ca42. 3. Add write access for scheduled of mptcp_subflow_context. 4. Drop mptcp_sock_type and mptcp_subflow_type. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- net/mptcp/bpf.c | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/bpf.c +++ b/net/mptcp/bpf.c @@ -XXX,XX +XXX,XX @@ #ifdef CONFIG_BPF_JIT static struct bpf_struct_ops bpf_mptcp_sched_ops; -static const struct btf_type *mptcp_sock_type, *mptcp_subflow_type __read_mostly; -static u32 mptcp_sock_id, mptcp_subflow_id; +static u32 mptcp_sock_id, + mptcp_subflow_id; + +/* MPTCP BPF packet scheduler */ static const struct bpf_func_proto * bpf_mptcp_sched_get_func_proto(enum bpf_func_id func_id, @@ -XXX,XX +XXX,XX @@ static int bpf_mptcp_sched_btf_struct_access(struct bpf_verifier_log *log, const struct bpf_reg_state *reg, int off, int size) { - const struct btf_type *t; + u32 id = reg->btf_id; size_t end; - t = btf_type_by_id(reg->btf, reg->btf_id); - - if (t == mptcp_sock_type) { + if (id == mptcp_sock_id) { switch (off) { case offsetof(struct mptcp_sock, snd_burst): end = offsetofend(struct mptcp_sock, snd_burst); @@ -XXX,XX +XXX,XX @@ static int bpf_mptcp_sched_btf_struct_access(struct bpf_verifier_log *log, off); return -EACCES; } - } else if (t == mptcp_subflow_type) { + } else if (id == mptcp_subflow_id) { switch (off) { case offsetof(struct mptcp_subflow_context, avg_pacing_rate): end = offsetofend(struct mptcp_subflow_context, avg_pacing_rate); break; + case offsetof(struct mptcp_subflow_context, scheduled): + end = offsetofend(struct mptcp_subflow_context, scheduled); + break; default: bpf_log(log, "no write support to mptcp_subflow_context at off %d\n", off); @@ -XXX,XX +XXX,XX @@ static int bpf_mptcp_sched_btf_struct_access(struct bpf_verifier_log *log, if (off + size > end) { bpf_log(log, "access beyond %s at off %u size %u ended at %zu", - t == mptcp_sock_type ? "mptcp_sock" : "mptcp_subflow_context", + id == mptcp_sock_id ? "mptcp_sock" : "mptcp_subflow_context", off, size, end); return -EACCES; } @@ -XXX,XX +XXX,XX @@ static int bpf_mptcp_sched_init_member(const struct btf_type *t, const struct mptcp_sched_ops *usched; struct mptcp_sched_ops *sched; u32 moff; - int ret; usched = (const struct mptcp_sched_ops *)udata; sched = (struct mptcp_sched_ops *)kdata; @@ -XXX,XX +XXX,XX @@ static int bpf_mptcp_sched_init_member(const struct btf_type *t, if (bpf_obj_name_cpy(sched->name, usched->name, sizeof(sched->name)) <= 0) return -EINVAL; - - rcu_read_lock(); - ret = mptcp_sched_find(usched->name) ? -EEXIST : 1; - rcu_read_unlock(); - - return ret; + return 1; } return 0; @@ -XXX,XX +XXX,XX @@ static int bpf_mptcp_sched_init(struct btf *btf) if (type_id < 0) return -EINVAL; mptcp_sock_id = type_id; - mptcp_sock_type = btf_type_by_id(btf, mptcp_sock_id); type_id = btf_find_by_name_kind(btf, "mptcp_subflow_context", BTF_KIND_STRUCT); if (type_id < 0) return -EINVAL; mptcp_subflow_id = type_id; - mptcp_subflow_type = btf_type_by_id(btf, mptcp_subflow_id); return 0; } +static int bpf_mptcp_sched_validate(void *kdata) +{ + return mptcp_validate_scheduler(kdata); +} + static int __bpf_mptcp_sched_get_subflow(struct mptcp_sock *msk, struct mptcp_sched_data *data) { @@ -XXX,XX +XXX,XX @@ static struct bpf_struct_ops bpf_mptcp_sched_ops = { .check_member = bpf_mptcp_sched_check_member, .init_member = bpf_mptcp_sched_init_member, .init = bpf_mptcp_sched_init, + .validate = bpf_mptcp_sched_validate, .name = "mptcp_sched_ops", .cfi_stubs = &__bpf_mptcp_sched_ops, }; -- 2.45.2
From: Geliang Tang <tanggeliang@kylinos.cn> A cleanup, move create_netns() out of sched_init(). Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- .../testing/selftests/bpf/prog_tests/mptcp.c | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -XXX,XX +XXX,XX @@ static void test_iters_subflow(void) close(cgroup_fd); } -static struct nstoken *sched_init(char *flags, char *sched) +static int sched_init(char *flags, char *sched) { - struct nstoken *nstoken; - - nstoken = create_netns(); - if (!ASSERT_OK_PTR(nstoken, "create_netns")) - return NULL; - - if (endpoint_init("subflow", 2) < 0) + if (endpoint_init(flags, 2) < 0) goto fail; SYS(fail, "ip netns exec %s sysctl -qw net.mptcp.scheduler=%s", NS_TEST, sched); - return nstoken; + return 0; fail: - cleanup_netns(nstoken); - return NULL; + return -1; } static int ss_search(char *src, char *dst, char *port, char *keyword) @@ -XXX,XX +XXX,XX @@ static void send_data_and_verify(char *sched, bool addr1, bool addr2) static void test_default(void) { struct nstoken *nstoken; + int err; - nstoken = sched_init("subflow", "default"); + nstoken = create_netns(); if (!nstoken) goto fail; + err = sched_init("subflow", "default"); + if (!ASSERT_OK(err, "sched_init")) + goto fail; + send_data_and_verify("default", WITH_DATA, WITH_DATA); fail: -- 2.45.2
From: Geliang Tang <tanggeliang@kylinos.cn> 1. Update sched_init. 2. For drop bpf_object__find_map_by_name in test_bpf_sched(), change the first parameter of it as bpf_map. 3. Implement mptcp_subflow_set_scheduled in BPF. 4. Drop bpf_mptcp_subflow_ctx_by_pos. 5. Use the newly added bpf_for_each() helper to walk the conn_list. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- tools/testing/selftests/bpf/prog_tests/mptcp.c | 15 +++++++++------ tools/testing/selftests/bpf/progs/mptcp_bpf.h | 14 ++++++++------ .../testing/selftests/bpf/progs/mptcp_bpf_first.c | 4 ++-- 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -XXX,XX +XXX,XX @@ static void test_default(void) cleanup_netns(nstoken); } -static void test_bpf_sched(struct bpf_object *obj, char *sched, +static void test_bpf_sched(struct bpf_map *map, char *sched, bool addr1, bool addr2) { char bpf_sched[MPTCP_SCHED_NAME_MAX] = "bpf_"; struct nstoken *nstoken; struct bpf_link *link; - struct bpf_map *map; + int err; if (!ASSERT_LT(strlen(bpf_sched) + strlen(sched), MPTCP_SCHED_NAME_MAX, "Scheduler name too long")) return; - map = bpf_object__find_map_by_name(obj, sched); link = bpf_map__attach_struct_ops(map); - if (CHECK(!link, sched, "attach_struct_ops: %d\n", errno)) + if (!ASSERT_OK_PTR(link, "attach_struct_ops")) return; - nstoken = sched_init("subflow", strcat(bpf_sched, sched)); + nstoken = create_netns(); if (!nstoken) goto fail; + err = sched_init("subflow", strcat(bpf_sched, sched)); + if (!ASSERT_OK(err, "sched_init")) + goto fail; + send_data_and_verify(sched, addr1, addr2); fail: @@ -XXX,XX +XXX,XX @@ static void test_first(void) if (!ASSERT_OK_PTR(skel, "open_and_load: first")) return; - test_bpf_sched(skel->obj, "first", WITH_DATA, WITHOUT_DATA); + test_bpf_sched(skel->maps.first, "first", WITH_DATA, WITHOUT_DATA); mptcp_bpf_first__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h b/tools/testing/selftests/bpf/progs/mptcp_bpf.h index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h @@ -XXX,XX +XXX,XX @@ mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow) return subflow->tcp_sock; } +#define WRITE_ONCE(x, val) ((*(volatile typeof(x) *) &(x)) = (val)) + +static __always_inline void +mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow, bool scheduled) +{ + WRITE_ONCE(subflow->scheduled, scheduled); +} + /* ksym */ extern struct mptcp_sock *bpf_mptcp_sock_acquire(struct mptcp_sock *msk) __ksym; extern void bpf_mptcp_sock_release(struct mptcp_sock *msk) __ksym; @@ -XXX,XX +XXX,XX @@ bpf_mptcp_subflow_ctx(const struct sock *sk) __ksym; extern struct sock * bpf_mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow) __ksym; -extern void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow, - bool scheduled) __ksym; - -extern struct mptcp_subflow_context * -bpf_mptcp_subflow_ctx_by_pos(const struct mptcp_sched_data *data, unsigned int pos) __ksym; - #endif diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c @@ -XXX,XX +XXX,XX @@ SEC("struct_ops") int BPF_PROG(bpf_first_get_subflow, struct mptcp_sock *msk, struct mptcp_sched_data *data) { - mptcp_subflow_set_scheduled(bpf_mptcp_subflow_ctx_by_pos(data, 0), true); + mptcp_subflow_set_scheduled(bpf_mptcp_subflow_ctx(msk->first), true); return 0; } -SEC(".struct_ops") +SEC(".struct_ops.link") struct mptcp_sched_ops first = { .init = (void *)mptcp_sched_first_init, .release = (void *)mptcp_sched_first_release, -- 2.45.2
From: Geliang Tang <tanggeliang@kylinos.cn> Update test_bpf_sched(). Use the newly added bpf_for_each() helper to walk the conn_list. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- tools/testing/selftests/bpf/prog_tests/mptcp.c | 2 +- .../selftests/bpf/progs/mptcp_bpf_bkup.c | 18 ++++-------------- 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -XXX,XX +XXX,XX @@ static void test_bkup(void) if (!ASSERT_OK_PTR(skel, "open_and_load: bkup")) return; - test_bpf_sched(skel->obj, "bkup", WITH_DATA, WITHOUT_DATA); + test_bpf_sched(skel->maps.bkup, "bkup", WITH_DATA, WITHOUT_DATA); mptcp_bpf_bkup__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_bkup.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_bkup.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_bkup.c +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_bkup.c @@ -XXX,XX +XXX,XX @@ SEC("struct_ops") int BPF_PROG(bpf_bkup_get_subflow, struct mptcp_sock *msk, struct mptcp_sched_data *data) { - int nr = -1; - - for (int i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) { - struct mptcp_subflow_context *subflow; - - subflow = bpf_mptcp_subflow_ctx_by_pos(data, i); - if (!subflow) - break; + struct mptcp_subflow_context *subflow; + bpf_for_each(mptcp_subflow, subflow, msk) { if (!BPF_CORE_READ_BITFIELD_PROBED(subflow, backup) || !BPF_CORE_READ_BITFIELD_PROBED(subflow, request_bkup)) { - nr = i; + mptcp_subflow_set_scheduled(subflow, true); break; } } - if (nr != -1) { - mptcp_subflow_set_scheduled(bpf_mptcp_subflow_ctx_by_pos(data, nr), true); - return -1; - } return 0; } -SEC(".struct_ops") +SEC(".struct_ops.link") struct mptcp_sched_ops bkup = { .init = (void *)mptcp_sched_bkup_init, .release = (void *)mptcp_sched_bkup_release, -- 2.45.2
From: Geliang Tang <tanggeliang@kylinos.cn> Update test_bpf_sched(). Use the newly added bpf_for_each() helper to walk the conn_list. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- .../testing/selftests/bpf/prog_tests/mptcp.c | 2 +- .../selftests/bpf/progs/mptcp_bpf_rr.c | 33 ++++++++----------- 2 files changed, 15 insertions(+), 20 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -XXX,XX +XXX,XX @@ static void test_rr(void) if (!ASSERT_OK_PTR(skel, "open_and_load: rr")) return; - test_bpf_sched(skel->obj, "rr", WITH_DATA, WITH_DATA); + test_bpf_sched(skel->maps.rr, "rr", WITH_DATA, WITH_DATA); mptcp_bpf_rr__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c @@ -XXX,XX +XXX,XX @@ SEC("struct_ops") int BPF_PROG(bpf_rr_get_subflow, struct mptcp_sock *msk, struct mptcp_sched_data *data) { - struct mptcp_subflow_context *subflow; + struct mptcp_subflow_context *subflow, *next; struct mptcp_rr_storage *ptr; - struct sock *last_snd = NULL; - int nr = 0; ptr = bpf_sk_storage_get(&mptcp_rr_map, msk, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); if (!ptr) return -1; - last_snd = ptr->last_snd; + next = bpf_mptcp_subflow_ctx(msk->first); + if (!ptr->last_snd) + goto out; - for (int i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) { - subflow = bpf_mptcp_subflow_ctx_by_pos(data, i); - if (!last_snd || !subflow) - break; - - if (mptcp_subflow_tcp_sock(subflow) == last_snd) { - if (i + 1 == MPTCP_SUBFLOWS_MAX || - !bpf_mptcp_subflow_ctx_by_pos(data, i + 1)) + bpf_for_each(mptcp_subflow, subflow, msk) { + if (mptcp_subflow_tcp_sock(subflow) == ptr->last_snd) { + subflow = bpf_iter_mptcp_subflow_next(&___it); + if (!subflow) break; - nr = i + 1; + next = subflow; break; } } - subflow = bpf_mptcp_subflow_ctx_by_pos(data, nr); - if (!subflow) - return -1; - mptcp_subflow_set_scheduled(subflow, true); - ptr->last_snd = mptcp_subflow_tcp_sock(subflow); +out: + next = bpf_core_cast(next, struct mptcp_subflow_context); + mptcp_subflow_set_scheduled(next, true); + ptr->last_snd = mptcp_subflow_tcp_sock(next); return 0; } -SEC(".struct_ops") +SEC(".struct_ops.link") struct mptcp_sched_ops rr = { .init = (void *)mptcp_sched_rr_init, .release = (void *)mptcp_sched_rr_release, -- 2.45.2
From: Geliang Tang <tanggeliang@kylinos.cn> Update test_bpf_sched(). Use the newly added bpf_for_each() helper to walk the conn_list. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- tools/testing/selftests/bpf/prog_tests/mptcp.c | 2 +- tools/testing/selftests/bpf/progs/mptcp_bpf_red.c | 10 ++++------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -XXX,XX +XXX,XX @@ static void test_red(void) if (!ASSERT_OK_PTR(skel, "open_and_load: red")) return; - test_bpf_sched(skel->obj, "red", WITH_DATA, WITH_DATA); + test_bpf_sched(skel->maps.red, "red", WITH_DATA, WITH_DATA); mptcp_bpf_red__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_red.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_red.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_red.c +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_red.c @@ -XXX,XX +XXX,XX @@ SEC("struct_ops") int BPF_PROG(bpf_red_get_subflow, struct mptcp_sock *msk, struct mptcp_sched_data *data) { - for (int i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) { - if (!bpf_mptcp_subflow_ctx_by_pos(data, i)) - break; + struct mptcp_subflow_context *subflow; - mptcp_subflow_set_scheduled(bpf_mptcp_subflow_ctx_by_pos(data, i), true); - } + bpf_for_each(mptcp_subflow, subflow, msk) + mptcp_subflow_set_scheduled(subflow, true); return 0; } -SEC(".struct_ops") +SEC(".struct_ops.link") struct mptcp_sched_ops red = { .init = (void *)mptcp_sched_red_init, .release = (void *)mptcp_sched_red_release, -- 2.45.2
From: Geliang Tang <tanggeliang@kylinos.cn> Update test_bpf_sched(). Use the newly added bpf_for_each() helper to walk the conn_list. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- .../testing/selftests/bpf/prog_tests/mptcp.c | 2 +- .../selftests/bpf/progs/mptcp_bpf_burst.c | 74 ++++++++----------- 2 files changed, 33 insertions(+), 43 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -XXX,XX +XXX,XX @@ static void test_burst(void) if (!ASSERT_OK_PTR(skel, "open_and_load: burst")) return; - test_bpf_sched(skel->obj, "burst", WITH_DATA, WITH_DATA); + test_bpf_sched(skel->maps.burst, "burst", WITH_DATA, WITH_DATA); mptcp_bpf_burst__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_burst.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_burst.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_burst.c +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_burst.c @@ -XXX,XX +XXX,XX @@ char _license[] SEC("license") = "GPL"; #define MPTCP_SEND_BURST_SIZE 65428 +#define SSK_MODE_ACTIVE 0 +#define SSK_MODE_BACKUP 1 +#define SSK_MODE_MAX 2 + #define min(a, b) ((a) < (b) ? (a) : (b)) struct bpf_subflow_send_info { - __u8 subflow_id; + struct mptcp_subflow_context *subflow; __u64 linger_time; }; @@ -XXX,XX +XXX,XX @@ extern bool tcp_stream_memory_free(const struct sock *sk, int wake) __ksym; extern bool bpf_mptcp_subflow_queues_empty(struct sock *sk) __ksym; extern void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) __ksym; -#define SSK_MODE_ACTIVE 0 -#define SSK_MODE_BACKUP 1 -#define SSK_MODE_MAX 2 - static __always_inline __u64 div_u64(__u64 dividend, __u32 divisor) { return dividend / divisor; @@ -XXX,XX +XXX,XX @@ void BPF_PROG(mptcp_sched_burst_release, struct mptcp_sock *msk) { } -static int bpf_burst_get_send(struct mptcp_sock *msk, - struct mptcp_sched_data *data) +static int bpf_burst_get_send(struct mptcp_sock *msk) { struct bpf_subflow_send_info send_info[SSK_MODE_MAX]; struct mptcp_subflow_context *subflow; @@ -XXX,XX +XXX,XX @@ static int bpf_burst_get_send(struct mptcp_sock *msk, /* pick the subflow with the lower wmem/wspace ratio */ for (i = 0; i < SSK_MODE_MAX; ++i) { - send_info[i].subflow_id = MPTCP_SUBFLOWS_MAX; + send_info[i].subflow = NULL; send_info[i].linger_time = -1; } - for (i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) { - bool backup; - - subflow = bpf_mptcp_subflow_ctx_by_pos(data, i); - if (!subflow) - break; - - backup = subflow->backup || subflow->request_bkup; + bpf_for_each(mptcp_subflow, subflow, msk) { + bool backup = subflow->backup || subflow->request_bkup; ssk = mptcp_subflow_tcp_sock(subflow); - if (!mptcp_subflow_active(subflow)) + if (!mptcp_subflow_active(subflow) || + !sk_stream_memory_free(ssk)) continue; nr_active += !backup; @@ -XXX,XX +XXX,XX @@ static int bpf_burst_get_send(struct mptcp_sock *msk, linger_time = div_u64((__u64)ssk->sk_wmem_queued << 32, pace); if (linger_time < send_info[backup].linger_time) { - send_info[backup].subflow_id = i; + send_info[backup].subflow = subflow; send_info[backup].linger_time = linger_time; } } @@ -XXX,XX +XXX,XX @@ static int bpf_burst_get_send(struct mptcp_sock *msk, /* pick the best backup if no other subflow is active */ if (!nr_active) - send_info[SSK_MODE_ACTIVE].subflow_id = send_info[SSK_MODE_BACKUP].subflow_id; + send_info[SSK_MODE_ACTIVE].subflow = send_info[SSK_MODE_BACKUP].subflow; - subflow = bpf_mptcp_subflow_ctx_by_pos(data, send_info[SSK_MODE_ACTIVE].subflow_id); + subflow = bpf_core_cast(send_info[SSK_MODE_ACTIVE].subflow, + struct mptcp_subflow_context); if (!subflow) return -1; ssk = mptcp_subflow_tcp_sock(subflow); - if (!ssk || !sk_stream_memory_free(ssk)) + if (!ssk) return -1; burst = min(MPTCP_SEND_BURST_SIZE, mptcp_wnd_end(msk) - msk->snd_nxt); @@ -XXX,XX +XXX,XX @@ static int bpf_burst_get_send(struct mptcp_sock *msk, return 0; } -static int bpf_burst_get_retrans(struct mptcp_sock *msk, - struct mptcp_sched_data *data) +static int bpf_burst_get_retrans(struct mptcp_sock *msk) { - int backup = MPTCP_SUBFLOWS_MAX, pick = MPTCP_SUBFLOWS_MAX, subflow_id; + struct sock *backup = NULL, *pick = NULL; struct mptcp_subflow_context *subflow; int min_stale_count = INT_MAX; - struct sock *ssk; - for (int i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) { - subflow = bpf_mptcp_subflow_ctx_by_pos(data, i); - if (!subflow) - break; + bpf_for_each(mptcp_subflow, subflow, msk) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); if (!mptcp_subflow_active(subflow)) continue; - ssk = mptcp_subflow_tcp_sock(subflow); /* still data outstanding at TCP level? skip this */ if (!tcp_rtx_and_write_queues_empty(ssk)) { mptcp_pm_subflow_chk_stale(msk, ssk); @@ -XXX,XX +XXX,XX @@ static int bpf_burst_get_retrans(struct mptcp_sock *msk, } if (subflow->backup || subflow->request_bkup) { - if (backup == MPTCP_SUBFLOWS_MAX) - backup = i; + if (!backup) + backup = ssk; continue; } - if (pick == MPTCP_SUBFLOWS_MAX) - pick = i; + if (!pick) + pick = ssk; } - if (pick < MPTCP_SUBFLOWS_MAX) { - subflow_id = pick; + if (pick) goto out; - } - subflow_id = min_stale_count > 1 ? backup : MPTCP_SUBFLOWS_MAX; + pick = min_stale_count > 1 ? backup : NULL; out: - subflow = bpf_mptcp_subflow_ctx_by_pos(data, subflow_id); + if (!pick) + return -1; + subflow = bpf_mptcp_subflow_ctx(pick); if (!subflow) return -1; mptcp_subflow_set_scheduled(subflow, true); @@ -XXX,XX +XXX,XX @@ int BPF_PROG(bpf_burst_get_subflow, struct mptcp_sock *msk, struct mptcp_sched_data *data) { if (data->reinject) - return bpf_burst_get_retrans(msk, data); - return bpf_burst_get_send(msk, data); + return bpf_burst_get_retrans(msk); + return bpf_burst_get_send(msk); } -SEC(".struct_ops") +SEC(".struct_ops.link") struct mptcp_sched_ops burst = { .init = (void *)mptcp_sched_burst_init, .release = (void *)mptcp_sched_burst_release, -- 2.45.2
From: Geliang Tang <tanggeliang@kylinos.cn> Remove mptcp_subflow_set_scheduled and bpf_mptcp_subflow_ctx_by_pos from BPF kfunc set. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- net/mptcp/bpf.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/bpf.c +++ b/net/mptcp/bpf.c @@ -XXX,XX +XXX,XX @@ static const struct btf_kfunc_id_set bpf_mptcp_common_kfunc_set = { }; BTF_KFUNCS_START(bpf_mptcp_sched_kfunc_ids) -BTF_ID_FLAGS(func, mptcp_subflow_set_scheduled) -BTF_ID_FLAGS(func, bpf_mptcp_subflow_ctx_by_pos) BTF_ID_FLAGS(func, mptcp_subflow_active) BTF_ID_FLAGS(func, mptcp_set_timeout) BTF_ID_FLAGS(func, mptcp_wnd_end) -- 2.45.2
From: Geliang Tang <tanggeliang@kylinos.cn> Drop this patch. bpf_mptcp_subflow_ctx_by_pos and mptcp_sched_data_set_contexts are uesless now. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- net/mptcp/bpf.c | 8 -------- net/mptcp/protocol.h | 2 -- net/mptcp/sched.c | 22 ---------------------- 3 files changed, 32 deletions(-) diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/bpf.c +++ b/net/mptcp/bpf.c @@ -XXX,XX +XXX,XX @@ __bpf_kfunc static void bpf_mptcp_sock_release(struct mptcp_sock *msk) WARN_ON_ONCE(!sk || !refcount_dec_not_one(&sk->sk_refcnt)); } -__bpf_kfunc struct mptcp_subflow_context * -bpf_mptcp_subflow_ctx_by_pos(const struct mptcp_sched_data *data, unsigned int pos) -{ - if (pos >= MPTCP_SUBFLOWS_MAX) - return NULL; - return data->contexts[pos]; -} - __bpf_kfunc static bool bpf_mptcp_subflow_queues_empty(struct sock *sk) { return tcp_rtx_queue_empty(sk); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -XXX,XX +XXX,XX @@ void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk); void mptcp_sock_graft(struct sock *sk, struct socket *parent); u64 mptcp_wnd_end(const struct mptcp_sock *msk); void mptcp_set_timeout(struct sock *sk); -struct mptcp_subflow_context * -bpf_mptcp_subflow_ctx_by_pos(const struct mptcp_sched_data *data, unsigned int pos); struct sock *__mptcp_nmpc_sk(struct mptcp_sock *msk); bool __mptcp_close(struct sock *sk, long timeout); void mptcp_cancel_work(struct sock *sk); diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/sched.c +++ b/net/mptcp/sched.c @@ -XXX,XX +XXX,XX @@ void mptcp_release_sched(struct mptcp_sock *msk) bpf_module_put(sched, sched->owner); } -static void mptcp_sched_data_set_contexts(const struct mptcp_sock *msk, - struct mptcp_sched_data *data) -{ - struct mptcp_subflow_context *subflow; - int i = 0; - - mptcp_for_each_subflow(msk, subflow) { - if (i == MPTCP_SUBFLOWS_MAX) { - pr_warn_once("too many subflows"); - break; - } - mptcp_subflow_set_scheduled(subflow, false); - data->contexts[i++] = subflow; - } - data->subflows = i; - - for (; i < MPTCP_SUBFLOWS_MAX; i++) - data->contexts[i] = NULL; -} - int mptcp_sched_get_send(struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow; @@ -XXX,XX +XXX,XX @@ int mptcp_sched_get_send(struct mptcp_sock *msk) data.reinject = false; if (msk->sched == &mptcp_sched_default || !msk->sched) return mptcp_sched_default_get_subflow(msk, &data); - mptcp_sched_data_set_contexts(msk, &data); return msk->sched->get_subflow(msk, &data); } @@ -XXX,XX +XXX,XX @@ int mptcp_sched_get_retrans(struct mptcp_sock *msk) data.reinject = true; if (msk->sched == &mptcp_sched_default || !msk->sched) return mptcp_sched_default_get_subflow(msk, &data); - mptcp_sched_data_set_contexts(msk, &data); return msk->sched->get_subflow(msk, &data); } -- 2.45.2
From: Geliang Tang <tanggeliang@kylinos.cn> The mptcp_subflow bpf_iter is added now, it's better to use the helper bpf_for_each(mptcp_subflow) to traverse all subflows on the conn_list of an MPTCP socket and then call kfunc to modify the fields of each subflow in the WIP MPTCP BPF packet scheduler examples, instead of converting them to a fixed array. With this helper, we can get rid of this subflow array "contexts" and the size of it "subflows" in struct mptcp_sched_data. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- include/net/mptcp.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/net/mptcp.h b/include/net/mptcp.h index XXXXXXX..XXXXXXX 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -XXX,XX +XXX,XX @@ struct mptcp_out_options { struct mptcp_sched_data { bool reinject; - u8 subflows; - struct mptcp_subflow_context *contexts[MPTCP_SUBFLOWS_MAX]; }; struct mptcp_sched_ops { -- 2.45.2