:p
atchew
Login
From: Geliang Tang <tanggeliang@kylinos.cn> v7: - move cleanup patches out of this set. - rebased. v6: - rebased to "add mptcp_subflow bpf_iter" v10 v5: - patch 2, drop mptcp_sock_type and mptcp_subflow_type. - patch 3, revert "bpf: Export more bpf_burst related functions" - patch 4, merge "bpf: Export more bpf_burst related functions" into it. v4: - patch 2, a new cleanup for "bpf: Add bpf_mptcp_sched_ops". - patch 3 should be reverted. - patch 8, register kfunc_set. v3: - rebased. - put the "drop has_bytes_sent" squash-to patch into this set. v2: - update bpf_rr and bpf_burst With the newly added mptcp_subflow bpf_iter, we can get rid of the subflows array "contexts" in struct mptcp_sched_data. This set uses bpf_for_each(mptcp_subflow) helper to update all the bpf schedules: bpf_for_each(mptcp_subflow, subflow, msk) { ... ... mptcp_subflow_set_scheduled(subflow, true); } Geliang Tang (5): Squash to "selftests/bpf: Add bpf_bkup scheduler & test" Squash to "selftests/bpf: Add bpf_rr scheduler & test" Squash to "selftests/bpf: Add bpf_red scheduler & test" Squash to "selftests/bpf: Add bpf_burst scheduler & test" Squash to "selftests/bpf: Add bpf_first scheduler & test" tools/testing/selftests/bpf/progs/mptcp_bpf.h | 3 - .../selftests/bpf/progs/mptcp_bpf_bkup.c | 18 +---- .../selftests/bpf/progs/mptcp_bpf_burst.c | 79 ++++++++++--------- .../selftests/bpf/progs/mptcp_bpf_first.c | 10 ++- .../selftests/bpf/progs/mptcp_bpf_red.c | 10 +-- .../selftests/bpf/progs/mptcp_bpf_rr.c | 26 +++--- 6 files changed, 67 insertions(+), 79 deletions(-) -- 2.45.2
From: Geliang Tang <tanggeliang@kylinos.cn> Use the newly added bpf_for_each() helper to walk the conn_list. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- .../selftests/bpf/progs/mptcp_bpf_bkup.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_bkup.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_bkup.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_bkup.c +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_bkup.c @@ -XXX,XX +XXX,XX @@ SEC("struct_ops") int BPF_PROG(bpf_bkup_get_subflow, struct mptcp_sock *msk, struct mptcp_sched_data *data) { - int nr = -1; - - for (int i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) { - struct mptcp_subflow_context *subflow; - - subflow = bpf_mptcp_subflow_ctx_by_pos(data, i); - if (!subflow) - break; + struct mptcp_subflow_context *subflow; + bpf_for_each(mptcp_subflow, subflow, msk) { if (!BPF_CORE_READ_BITFIELD_PROBED(subflow, backup) || !BPF_CORE_READ_BITFIELD_PROBED(subflow, request_bkup)) { - nr = i; + mptcp_subflow_set_scheduled(subflow, true); break; } } - if (nr != -1) { - mptcp_subflow_set_scheduled(bpf_mptcp_subflow_ctx_by_pos(data, nr), true); - return -1; - } return 0; } -SEC(".struct_ops") +SEC(".struct_ops.link") struct mptcp_sched_ops bkup = { .init = (void *)mptcp_sched_bkup_init, .release = (void *)mptcp_sched_bkup_release, -- 2.45.2
From: Geliang Tang <tanggeliang@kylinos.cn> Use the newly added bpf_for_each() helper to walk the conn_list. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- .../selftests/bpf/progs/mptcp_bpf_rr.c | 26 ++++++++----------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c @@ -XXX,XX +XXX,XX @@ SEC("struct_ops") int BPF_PROG(bpf_rr_get_subflow, struct mptcp_sock *msk, struct mptcp_sched_data *data) { - struct mptcp_subflow_context *subflow; + struct mptcp_subflow_context *subflow, *next; struct mptcp_rr_storage *ptr; struct sock *last_snd = NULL; - int nr = 0; ptr = bpf_sk_storage_get(&mptcp_rr_map, msk, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); @@ -XXX,XX +XXX,XX @@ int BPF_PROG(bpf_rr_get_subflow, struct mptcp_sock *msk, return -1; last_snd = ptr->last_snd; + next = bpf_mptcp_subflow_ctx(msk->first); - for (int i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) { - subflow = bpf_mptcp_subflow_ctx_by_pos(data, i); - if (!last_snd || !subflow) + bpf_for_each(mptcp_subflow, subflow, msk) { + if (!last_snd) break; - if (mptcp_subflow_tcp_sock(subflow) == last_snd) { - if (i + 1 == MPTCP_SUBFLOWS_MAX || - !bpf_mptcp_subflow_ctx_by_pos(data, i + 1)) + if (bpf_mptcp_subflow_tcp_sock(subflow) == last_snd) { + subflow = bpf_iter_mptcp_subflow_next(&___it); + if (!subflow) break; - nr = i + 1; + next = subflow; break; } } - subflow = bpf_mptcp_subflow_ctx_by_pos(data, nr); - if (!subflow) - return -1; - mptcp_subflow_set_scheduled(subflow, true); - ptr->last_snd = mptcp_subflow_tcp_sock(subflow); + mptcp_subflow_set_scheduled(next, true); + ptr->last_snd = bpf_mptcp_subflow_tcp_sock(next); return 0; } -SEC(".struct_ops") +SEC(".struct_ops.link") struct mptcp_sched_ops rr = { .init = (void *)mptcp_sched_rr_init, .release = (void *)mptcp_sched_rr_release, -- 2.45.2
From: Geliang Tang <tanggeliang@kylinos.cn> Use the newly added bpf_for_each() helper to walk the conn_list. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- tools/testing/selftests/bpf/progs/mptcp_bpf_red.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_red.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_red.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_red.c +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_red.c @@ -XXX,XX +XXX,XX @@ SEC("struct_ops") int BPF_PROG(bpf_red_get_subflow, struct mptcp_sock *msk, struct mptcp_sched_data *data) { - for (int i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) { - if (!bpf_mptcp_subflow_ctx_by_pos(data, i)) - break; + struct mptcp_subflow_context *subflow; - mptcp_subflow_set_scheduled(bpf_mptcp_subflow_ctx_by_pos(data, i), true); - } + bpf_for_each(mptcp_subflow, subflow, msk) + mptcp_subflow_set_scheduled(subflow, true); return 0; } -SEC(".struct_ops") +SEC(".struct_ops.link") struct mptcp_sched_ops red = { .init = (void *)mptcp_sched_red_init, .release = (void *)mptcp_sched_red_release, -- 2.45.2
From: Geliang Tang <tanggeliang@kylinos.cn> Use the newly added bpf_for_each() helper to walk the conn_list. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- .../selftests/bpf/progs/mptcp_bpf_burst.c | 79 ++++++++++--------- 1 file changed, 40 insertions(+), 39 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_burst.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_burst.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_burst.c +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_burst.c @@ -XXX,XX +XXX,XX @@ char _license[] SEC("license") = "GPL"; #define min(a, b) ((a) < (b) ? (a) : (b)) +#define SSK_MODE_ACTIVE 0 +#define SSK_MODE_BACKUP 1 +#define SSK_MODE_MAX 2 + struct bpf_subflow_send_info { __u8 subflow_id; __u64 linger_time; @@ -XXX,XX +XXX,XX @@ extern bool tcp_stream_memory_free(const struct sock *sk, int wake) __ksym; extern bool bpf_mptcp_subflow_queues_empty(struct sock *sk) __ksym; extern void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) __ksym; -#define SSK_MODE_ACTIVE 0 -#define SSK_MODE_BACKUP 1 -#define SSK_MODE_MAX 2 - static __always_inline __u64 div_u64(__u64 dividend, __u32 divisor) { return dividend / divisor; @@ -XXX,XX +XXX,XX @@ static __always_inline bool sk_stream_memory_free(const struct sock *sk) return __sk_stream_memory_free(sk, 0); } +static struct mptcp_subflow_context * +mptcp_lookup_subflow_by_id(struct mptcp_sock *msk, unsigned int id) +{ + struct mptcp_subflow_context *subflow; + + bpf_for_each(mptcp_subflow, subflow, msk) { + if (subflow->subflow_id == id) + return subflow; + } + + return NULL; +} + SEC("struct_ops") void BPF_PROG(mptcp_sched_burst_init, struct mptcp_sock *msk) { @@ -XXX,XX +XXX,XX @@ void BPF_PROG(mptcp_sched_burst_release, struct mptcp_sock *msk) { } -static int bpf_burst_get_send(struct mptcp_sock *msk, - struct mptcp_sched_data *data) +static int bpf_burst_get_send(struct mptcp_sock *msk) { struct bpf_subflow_send_info send_info[SSK_MODE_MAX]; struct mptcp_subflow_context *subflow; @@ -XXX,XX +XXX,XX @@ static int bpf_burst_get_send(struct mptcp_sock *msk, send_info[i].linger_time = -1; } - for (i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) { - bool backup; + bpf_for_each(mptcp_subflow, subflow, msk) { + bool backup = subflow->backup || subflow->request_bkup; - subflow = bpf_mptcp_subflow_ctx_by_pos(data, i); - if (!subflow) - break; - - backup = subflow->backup || subflow->request_bkup; - - ssk = mptcp_subflow_tcp_sock(subflow); + ssk = bpf_mptcp_subflow_tcp_sock(subflow); if (!mptcp_subflow_active(subflow)) continue; @@ -XXX,XX +XXX,XX @@ static int bpf_burst_get_send(struct mptcp_sock *msk, linger_time = div_u64((__u64)ssk->sk_wmem_queued << 32, pace); if (linger_time < send_info[backup].linger_time) { - send_info[backup].subflow_id = i; + send_info[backup].subflow_id = subflow->subflow_id; send_info[backup].linger_time = linger_time; } } @@ -XXX,XX +XXX,XX @@ static int bpf_burst_get_send(struct mptcp_sock *msk, if (!nr_active) send_info[SSK_MODE_ACTIVE].subflow_id = send_info[SSK_MODE_BACKUP].subflow_id; - subflow = bpf_mptcp_subflow_ctx_by_pos(data, send_info[SSK_MODE_ACTIVE].subflow_id); + subflow = mptcp_lookup_subflow_by_id(msk, send_info[SSK_MODE_ACTIVE].subflow_id); if (!subflow) return -1; - ssk = mptcp_subflow_tcp_sock(subflow); + ssk = bpf_mptcp_subflow_tcp_sock(subflow); if (!ssk || !sk_stream_memory_free(ssk)) return -1; @@ -XXX,XX +XXX,XX @@ static int bpf_burst_get_send(struct mptcp_sock *msk, return 0; } -static int bpf_burst_get_retrans(struct mptcp_sock *msk, - struct mptcp_sched_data *data) +static int bpf_burst_get_retrans(struct mptcp_sock *msk) { - int backup = MPTCP_SUBFLOWS_MAX, pick = MPTCP_SUBFLOWS_MAX, subflow_id; + struct sock *backup = NULL, *pick = NULL; struct mptcp_subflow_context *subflow; int min_stale_count = INT_MAX; - struct sock *ssk; - for (int i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) { - subflow = bpf_mptcp_subflow_ctx_by_pos(data, i); - if (!subflow) - break; + bpf_for_each(mptcp_subflow, subflow, msk) { + struct sock *ssk = bpf_mptcp_subflow_tcp_sock(subflow); if (!mptcp_subflow_active(subflow)) continue; - ssk = mptcp_subflow_tcp_sock(subflow); /* still data outstanding at TCP level? skip this */ if (!tcp_rtx_and_write_queues_empty(ssk)) { mptcp_pm_subflow_chk_stale(msk, ssk); @@ -XXX,XX +XXX,XX @@ static int bpf_burst_get_retrans(struct mptcp_sock *msk, } if (subflow->backup || subflow->request_bkup) { - if (backup == MPTCP_SUBFLOWS_MAX) - backup = i; + if (!backup) + backup = ssk; continue; } - if (pick == MPTCP_SUBFLOWS_MAX) - pick = i; + if (!pick) + pick = ssk; } - if (pick < MPTCP_SUBFLOWS_MAX) { - subflow_id = pick; + if (pick) goto out; - } - subflow_id = min_stale_count > 1 ? backup : MPTCP_SUBFLOWS_MAX; + pick = min_stale_count > 1 ? backup : NULL; out: - subflow = bpf_mptcp_subflow_ctx_by_pos(data, subflow_id); + if (!pick) + return -1; + subflow = bpf_mptcp_subflow_ctx(pick); if (!subflow) return -1; mptcp_subflow_set_scheduled(subflow, true); @@ -XXX,XX +XXX,XX @@ int BPF_PROG(bpf_burst_get_subflow, struct mptcp_sock *msk, struct mptcp_sched_data *data) { if (data->reinject) - return bpf_burst_get_retrans(msk, data); - return bpf_burst_get_send(msk, data); + return bpf_burst_get_retrans(msk); + return bpf_burst_get_send(msk); } -SEC(".struct_ops") +SEC(".struct_ops.link") struct mptcp_sched_ops burst = { .init = (void *)mptcp_sched_burst_init, .release = (void *)mptcp_sched_burst_release, -- 2.45.2
From: Geliang Tang <tanggeliang@kylinos.cn> Use the newly added bpf_for_each() helper to walk the conn_list. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- tools/testing/selftests/bpf/progs/mptcp_bpf.h | 3 --- tools/testing/selftests/bpf/progs/mptcp_bpf_first.c | 10 ++++++++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h b/tools/testing/selftests/bpf/progs/mptcp_bpf.h index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h @@ -XXX,XX +XXX,XX @@ extern bool bpf_ipv6_addr_v4mapped(const struct mptcp_addr_info *a) __ksym; extern void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow, bool scheduled) __ksym; -extern struct mptcp_subflow_context * -bpf_mptcp_subflow_ctx_by_pos(const struct mptcp_sched_data *data, unsigned int pos) __ksym; - #endif diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c @@ -XXX,XX +XXX,XX @@ SEC("struct_ops") int BPF_PROG(bpf_first_get_subflow, struct mptcp_sock *msk, struct mptcp_sched_data *data) { - mptcp_subflow_set_scheduled(bpf_mptcp_subflow_ctx_by_pos(data, 0), true); + struct mptcp_subflow_context *subflow; + + bpf_for_each(mptcp_subflow, subflow, msk) { + mptcp_subflow_set_scheduled(subflow, true); + break; + } + return 0; } -SEC(".struct_ops") +SEC(".struct_ops.link") struct mptcp_sched_ops first = { .init = (void *)mptcp_sched_first_init, .release = (void *)mptcp_sched_first_release, -- 2.45.2
From: Geliang Tang <tanggeliang@kylinos.cn> v14: - patch 1, keep mptcp_sched_data_set_contexts() helper for future use. - patch 2, use "sk = sk__ign" in bpf_mptcp_subflow_ctx() and bpf_sk_stream_memory_free(). - patch 8, drop "subflows" from struct mptcp_sched_data too. Depends on: - Squash to "Add mptcp_subflow bpf_iter support", v2 Based-on: <cover.1738470660.git.tanggeliang@kylinos.cn> v13: - use '__ign' suffix to ignore the argument type checks of bpf_mptcp_subflow_ctx() and bpf_sk_stream_memory_free(), instead of adding a new helper bpf_mptcp_send_info_to_ssk(). - use 'bpf_for_each(mptcp_subflow, subflow, (struct sock *)msk)' instead of using 'bpf_for_each(mptcp_subflow, subflow, msk)'. - keep struct mptcp_sched_data for future use. v12: - drop struct mptcp_sched_data. - rebased on "split get_subflow interface into two" v2. v11: If another squash-to patchset (Squash to "Add mptcp_subflow bpf_iter support") under review is merged before this set, v10 will fail to run. v11 fixes this issue and can run regardless of whether it is merged before or after the squash-to patchset. Compared with v10, only patches 3, 5, and 8 have been modified: - use mptcp_subflow_tcp_sock instead of bpf_mptcp_subflow_tcp_sock in patch 3 and patch 5. - drop bpf_mptcp_sched_kfunc_set, use bpf_mptcp_common_kfunc_set instead in patch 8. v10: - drop mptcp_subflow_set_scheduled() helper and WRITE_ONCE() in BPF. - add new bpf helper bpf_mptcp_send_info_to_ssk() for burst scheduler. v9: - merge 'Fixes for "use bpf_iter in bpf schedulers" v8' into this set. - rebased on "add netns helpers" v4 v8: - address Mat's comments in v7. - move sk_stream_memory_free check inside bpf_for_each() loop. - implement mptcp_subflow_set_scheduled helper in BPF. - add cleanup patches into this set again. v7: - move cleanup patches out of this set. - rebased. v6: - rebased to "add mptcp_subflow bpf_iter" v10 v5: - patch 2, drop mptcp_sock_type and mptcp_subflow_type. - patch 3, revert "bpf: Export more bpf_burst related functions" - patch 4, merge "bpf: Export more bpf_burst related functions" into it. v4: - patch 2, a new cleanup for "bpf: Add bpf_mptcp_sched_ops". - patch 3 should be reverted. - patch 8, register kfunc_set. v3: - rebased. - put the "drop has_bytes_sent" squash-to patch into this set. v2: - update bpf_rr and bpf_burst With the newly added mptcp_subflow bpf_iter, we can get rid of the subflows array "contexts" in struct mptcp_sched_data. This set uses bpf_for_each(mptcp_subflow) helper to update all the bpf schedules: bpf_for_each(mptcp_subflow, subflow, (struct sock *)msk) { ... ... mptcp_subflow_set_scheduled(subflow, true); } Geliang Tang (8): Squash to "mptcp: add sched_data helpers" Squash to "bpf: Export mptcp packet scheduler helpers" Squash to "selftests/bpf: Add bpf_first scheduler & test" Squash to "selftests/bpf: Add bpf_bkup scheduler & test" Squash to "selftests/bpf: Add bpf_rr scheduler & test" Squash to "selftests/bpf: Add bpf_red scheduler & test" Squash to "selftests/bpf: Add bpf_burst scheduler & test" mptcp: drop subflow contexts in mptcp_sched_data include/net/mptcp.h | 4 - net/mptcp/bpf.c | 50 ++++++------ net/mptcp/protocol.h | 2 - net/mptcp/sched.c | 15 ---- tools/testing/selftests/bpf/progs/mptcp_bpf.h | 3 - .../selftests/bpf/progs/mptcp_bpf_bkup.c | 16 +--- .../selftests/bpf/progs/mptcp_bpf_burst.c | 78 +++++++------------ .../selftests/bpf/progs/mptcp_bpf_first.c | 8 +- .../selftests/bpf/progs/mptcp_bpf_red.c | 8 +- .../selftests/bpf/progs/mptcp_bpf_rr.c | 31 ++++---- 10 files changed, 81 insertions(+), 134 deletions(-) -- 2.43.0
From: Geliang Tang <tanggeliang@kylinos.cn> Drop bpf_mptcp_subflow_ctx_by_pos. Keep mptcp_sched_data_set_contexts for future use. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- net/mptcp/bpf.c | 8 -------- net/mptcp/protocol.h | 2 -- net/mptcp/sched.c | 15 --------------- 3 files changed, 25 deletions(-) diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/bpf.c +++ b/net/mptcp/bpf.c @@ -XXX,XX +XXX,XX @@ bpf_iter_mptcp_subflow_destroy(struct bpf_iter_mptcp_subflow *it) { } -__bpf_kfunc struct mptcp_subflow_context * -bpf_mptcp_subflow_ctx_by_pos(const struct mptcp_sched_data *data, unsigned int pos) -{ - if (pos >= MPTCP_SUBFLOWS_MAX) - return NULL; - return data->contexts[pos]; -} - __bpf_kfunc static bool bpf_mptcp_subflow_queues_empty(struct sock *sk) { return tcp_rtx_queue_empty(sk); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -XXX,XX +XXX,XX @@ void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk); void mptcp_sock_graft(struct sock *sk, struct socket *parent); u64 mptcp_wnd_end(const struct mptcp_sock *msk); void mptcp_set_timeout(struct sock *sk); -struct mptcp_subflow_context * -bpf_mptcp_subflow_ctx_by_pos(const struct mptcp_sched_data *data, unsigned int pos); struct sock *__mptcp_nmpc_sk(struct mptcp_sock *msk); bool __mptcp_close(struct sock *sk, long timeout); void mptcp_cancel_work(struct sock *sk); diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/sched.c +++ b/net/mptcp/sched.c @@ -XXX,XX +XXX,XX @@ void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow, static void mptcp_sched_data_set_contexts(const struct mptcp_sock *msk, struct mptcp_sched_data *data) { - struct mptcp_subflow_context *subflow; - int i = 0; - - mptcp_for_each_subflow(msk, subflow) { - if (i == MPTCP_SUBFLOWS_MAX) { - pr_warn_once("too many subflows"); - break; - } - mptcp_subflow_set_scheduled(subflow, false); - data->contexts[i++] = subflow; - } - data->subflows = i; - - for (; i < MPTCP_SUBFLOWS_MAX; i++) - data->contexts[i] = NULL; } int mptcp_sched_get_send(struct mptcp_sock *msk) -- 2.43.0
From: Geliang Tang <tanggeliang@kylinos.cn> Instead of adding a new BPF function bpf_mptcp_send_info_to_ssk() in v12, this patch uses a much more simpler approach, which using '__ign' suffix for the argument of bpf_mptcp_subflow_ctx() to let BPF to ignore the type check of this argument. Remove bpf_mptcp_subflow_ctx_by_pos from BPF kfunc set. Drop bpf_mptcp_sched_kfunc_set, use bpf_mptcp_common_kfunc_set instead. Add new helpers bpf_mptcp_subflow_tcp_sock() and bpf_sk_stream_memory_free(). Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- net/mptcp/bpf.c | 44 +++++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/bpf.c +++ b/net/mptcp/bpf.c @@ -XXX,XX +XXX,XX @@ struct bpf_iter_mptcp_subflow_kern { __bpf_kfunc_start_defs(); __bpf_kfunc static struct mptcp_subflow_context * -bpf_mptcp_subflow_ctx(const struct sock *sk) +bpf_mptcp_subflow_ctx(const struct sock *sk__ign) { + const struct sock *sk = sk__ign; + if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP && sk_is_mptcp(sk)) return mptcp_subflow_ctx(sk); @@ -XXX,XX +XXX,XX @@ bpf_mptcp_subflow_ctx(const struct sock *sk) return NULL; } +__bpf_kfunc static struct sock * +bpf_mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow) +{ + if (!subflow) + return NULL; + + return mptcp_subflow_tcp_sock(subflow); +} + __bpf_kfunc static int bpf_iter_mptcp_subflow_new(struct bpf_iter_mptcp_subflow *it, struct sock *sk) @@ -XXX,XX +XXX,XX @@ __bpf_kfunc static bool bpf_mptcp_subflow_queues_empty(struct sock *sk) return tcp_rtx_queue_empty(sk); } +__bpf_kfunc static bool bpf_sk_stream_memory_free(const struct sock *sk__ign) +{ + const struct sock *sk = sk__ign; + + if (sk && sk_fullsock(sk) && + sk->sk_protocol == IPPROTO_TCP && sk_is_mptcp(sk)) + return sk_stream_memory_free(sk); + + return NULL; +} + __bpf_kfunc_end_defs(); BTF_KFUNCS_START(bpf_mptcp_common_kfunc_ids) BTF_ID_FLAGS(func, bpf_mptcp_subflow_ctx, KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_mptcp_subflow_tcp_sock, KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_new, KF_ITER_NEW | KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_next, KF_ITER_NEXT | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_destroy, KF_ITER_DESTROY) -BTF_KFUNCS_END(bpf_mptcp_common_kfunc_ids) - -static const struct btf_kfunc_id_set bpf_mptcp_common_kfunc_set = { - .owner = THIS_MODULE, - .set = &bpf_mptcp_common_kfunc_ids, -}; - -BTF_KFUNCS_START(bpf_mptcp_sched_kfunc_ids) BTF_ID_FLAGS(func, mptcp_subflow_set_scheduled) -BTF_ID_FLAGS(func, bpf_mptcp_subflow_ctx_by_pos) BTF_ID_FLAGS(func, mptcp_subflow_active) BTF_ID_FLAGS(func, mptcp_set_timeout) BTF_ID_FLAGS(func, mptcp_wnd_end) -BTF_ID_FLAGS(func, tcp_stream_memory_free) +BTF_ID_FLAGS(func, bpf_sk_stream_memory_free, KF_RET_NULL) BTF_ID_FLAGS(func, bpf_mptcp_subflow_queues_empty) BTF_ID_FLAGS(func, mptcp_pm_subflow_chk_stale, KF_SLEEPABLE) -BTF_KFUNCS_END(bpf_mptcp_sched_kfunc_ids) +BTF_KFUNCS_END(bpf_mptcp_common_kfunc_ids) -static const struct btf_kfunc_id_set bpf_mptcp_sched_kfunc_set = { +static const struct btf_kfunc_id_set bpf_mptcp_common_kfunc_set = { .owner = THIS_MODULE, - .set = &bpf_mptcp_sched_kfunc_ids, + .set = &bpf_mptcp_common_kfunc_ids, }; static int __init bpf_mptcp_kfunc_init(void) @@ -XXX,XX +XXX,XX @@ static int __init bpf_mptcp_kfunc_init(void) ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCKOPT, &bpf_mptcp_common_kfunc_set); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, - &bpf_mptcp_sched_kfunc_set); + &bpf_mptcp_common_kfunc_set); #ifdef CONFIG_BPF_JIT ret = ret ?: register_bpf_struct_ops(&bpf_mptcp_sched_ops, mptcp_sched_ops); #endif -- 2.43.0
From: Geliang Tang <tanggeliang@kylinos.cn> Use the newly added bpf_for_each() helper to walk the conn_list. Drop bpf_mptcp_subflow_ctx_by_pos declaration. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- tools/testing/selftests/bpf/progs/mptcp_bpf.h | 3 --- tools/testing/selftests/bpf/progs/mptcp_bpf_first.c | 8 +++++++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h b/tools/testing/selftests/bpf/progs/mptcp_bpf.h index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h @@ -XXX,XX +XXX,XX @@ bpf_mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow) __ksym; extern void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow, bool scheduled) __ksym; -extern struct mptcp_subflow_context * -bpf_mptcp_subflow_ctx_by_pos(const struct mptcp_sched_data *data, unsigned int pos) __ksym; - #endif diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c @@ -XXX,XX +XXX,XX @@ SEC("struct_ops") int BPF_PROG(bpf_first_get_send, struct mptcp_sock *msk, struct mptcp_sched_data *data) { - mptcp_subflow_set_scheduled(bpf_mptcp_subflow_ctx_by_pos(data, 0), true); + struct mptcp_subflow_context *subflow; + + subflow = bpf_mptcp_subflow_ctx(msk->first); + if (!subflow) + return -1; + + mptcp_subflow_set_scheduled(subflow, true); return 0; } -- 2.43.0
From: Geliang Tang <tanggeliang@kylinos.cn> Use the newly added bpf_for_each() helper to walk the conn_list. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- .../testing/selftests/bpf/progs/mptcp_bpf_bkup.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_bkup.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_bkup.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_bkup.c +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_bkup.c @@ -XXX,XX +XXX,XX @@ SEC("struct_ops") int BPF_PROG(bpf_bkup_get_send, struct mptcp_sock *msk, struct mptcp_sched_data *data) { - int nr = -1; - - for (int i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) { - struct mptcp_subflow_context *subflow; - - subflow = bpf_mptcp_subflow_ctx_by_pos(data, i); - if (!subflow) - break; + struct mptcp_subflow_context *subflow; + bpf_for_each(mptcp_subflow, subflow, (struct sock *)msk) { if (!BPF_CORE_READ_BITFIELD_PROBED(subflow, backup) || !BPF_CORE_READ_BITFIELD_PROBED(subflow, request_bkup)) { - nr = i; + mptcp_subflow_set_scheduled(subflow, true); break; } } - if (nr != -1) { - mptcp_subflow_set_scheduled(bpf_mptcp_subflow_ctx_by_pos(data, nr), true); - return -1; - } return 0; } -- 2.43.0
From: Geliang Tang <tanggeliang@kylinos.cn> Use the newly added bpf_for_each() helper to walk the conn_list. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- .../selftests/bpf/progs/mptcp_bpf_rr.c | 31 +++++++++---------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c @@ -XXX,XX +XXX,XX @@ SEC("struct_ops") int BPF_PROG(bpf_rr_get_send, struct mptcp_sock *msk, struct mptcp_sched_data *data) { - struct mptcp_subflow_context *subflow; + struct mptcp_subflow_context *subflow, *next; struct mptcp_rr_storage *ptr; - struct sock *last_snd = NULL; - int nr = 0; ptr = bpf_sk_storage_get(&mptcp_rr_map, msk, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); if (!ptr) return -1; - last_snd = ptr->last_snd; + next = bpf_mptcp_subflow_ctx(msk->first); + if (!next) + return -1; - for (int i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) { - subflow = bpf_mptcp_subflow_ctx_by_pos(data, i); - if (!last_snd || !subflow) - break; + if (!ptr->last_snd) + goto out; - if (mptcp_subflow_tcp_sock(subflow) == last_snd) { - if (i + 1 == MPTCP_SUBFLOWS_MAX || - !bpf_mptcp_subflow_ctx_by_pos(data, i + 1)) + bpf_for_each(mptcp_subflow, subflow, (struct sock *)msk) { + if (mptcp_subflow_tcp_sock(subflow) == ptr->last_snd) { + subflow = bpf_iter_mptcp_subflow_next(&___it); + if (!subflow) break; - nr = i + 1; + next = subflow; break; } } - subflow = bpf_mptcp_subflow_ctx_by_pos(data, nr); - if (!subflow) - return -1; - mptcp_subflow_set_scheduled(subflow, true); - ptr->last_snd = mptcp_subflow_tcp_sock(subflow); +out: + mptcp_subflow_set_scheduled(next, true); + ptr->last_snd = mptcp_subflow_tcp_sock(next); return 0; } -- 2.43.0
From: Geliang Tang <tanggeliang@kylinos.cn> Use the newly added bpf_for_each() helper to walk the conn_list. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- tools/testing/selftests/bpf/progs/mptcp_bpf_red.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_red.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_red.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_red.c +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_red.c @@ -XXX,XX +XXX,XX @@ SEC("struct_ops") int BPF_PROG(bpf_red_get_send, struct mptcp_sock *msk, struct mptcp_sched_data *data) { - for (int i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) { - if (!bpf_mptcp_subflow_ctx_by_pos(data, i)) - break; + struct mptcp_subflow_context *subflow; - mptcp_subflow_set_scheduled(bpf_mptcp_subflow_ctx_by_pos(data, i), true); - } + bpf_for_each(mptcp_subflow, subflow, (struct sock *)msk) + mptcp_subflow_set_scheduled(subflow, true); return 0; } -- 2.43.0
From: Geliang Tang <tanggeliang@kylinos.cn> Use the newly added bpf_for_each() helper to walk the conn_list. Drop bpf_subflow_send_info, use subflow_send_info instead. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- .../selftests/bpf/progs/mptcp_bpf_burst.c | 78 +++++++------------ 1 file changed, 26 insertions(+), 52 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_burst.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_burst.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_burst.c +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_burst.c @@ -XXX,XX +XXX,XX @@ char _license[] SEC("license") = "GPL"; #define min(a, b) ((a) < (b) ? (a) : (b)) -struct bpf_subflow_send_info { - __u8 subflow_id; - __u64 linger_time; -}; - extern bool mptcp_subflow_active(struct mptcp_subflow_context *subflow) __ksym; extern void mptcp_set_timeout(struct sock *sk) __ksym; extern __u64 mptcp_wnd_end(const struct mptcp_sock *msk) __ksym; -extern bool tcp_stream_memory_free(const struct sock *sk, int wake) __ksym; +extern bool bpf_sk_stream_memory_free(const struct sock *sk) __ksym; extern bool bpf_mptcp_subflow_queues_empty(struct sock *sk) __ksym; extern void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) __ksym; @@ -XXX,XX +XXX,XX @@ static __always_inline bool tcp_rtx_and_write_queues_empty(struct sock *sk) return bpf_mptcp_subflow_queues_empty(sk) && tcp_write_queue_empty(sk); } -static __always_inline bool __sk_stream_memory_free(const struct sock *sk, int wake) -{ - if (sk->sk_wmem_queued >= sk->sk_sndbuf) - return false; - - return tcp_stream_memory_free(sk, wake); -} - -static __always_inline bool sk_stream_memory_free(const struct sock *sk) -{ - return __sk_stream_memory_free(sk, 0); -} - SEC("struct_ops") void BPF_PROG(mptcp_sched_burst_init, struct mptcp_sock *msk) { @@ -XXX,XX +XXX,XX @@ SEC("struct_ops") int BPF_PROG(bpf_burst_get_send, struct mptcp_sock *msk, struct mptcp_sched_data *data) { - struct bpf_subflow_send_info send_info[SSK_MODE_MAX]; + struct subflow_send_info send_info[SSK_MODE_MAX]; struct mptcp_subflow_context *subflow; struct sock *sk = (struct sock *)msk; __u32 pace, burst, wmem; @@ -XXX,XX +XXX,XX @@ int BPF_PROG(bpf_burst_get_send, struct mptcp_sock *msk, /* pick the subflow with the lower wmem/wspace ratio */ for (i = 0; i < SSK_MODE_MAX; ++i) { - send_info[i].subflow_id = MPTCP_SUBFLOWS_MAX; + send_info[i].ssk = NULL; send_info[i].linger_time = -1; } - for (i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) { - bool backup; - - subflow = bpf_mptcp_subflow_ctx_by_pos(data, i); - if (!subflow) - break; - - backup = subflow->backup || subflow->request_bkup; + bpf_for_each(mptcp_subflow, subflow, sk) { + bool backup = subflow->backup || subflow->request_bkup; ssk = mptcp_subflow_tcp_sock(subflow); if (!mptcp_subflow_active(subflow)) @@ -XXX,XX +XXX,XX @@ int BPF_PROG(bpf_burst_get_send, struct mptcp_sock *msk, linger_time = div_u64((__u64)ssk->sk_wmem_queued << 32, pace); if (linger_time < send_info[backup].linger_time) { - send_info[backup].subflow_id = i; + send_info[backup].ssk = ssk; send_info[backup].linger_time = linger_time; } } @@ -XXX,XX +XXX,XX @@ int BPF_PROG(bpf_burst_get_send, struct mptcp_sock *msk, /* pick the best backup if no other subflow is active */ if (!nr_active) - send_info[SSK_MODE_ACTIVE].subflow_id = send_info[SSK_MODE_BACKUP].subflow_id; + send_info[SSK_MODE_ACTIVE].ssk = send_info[SSK_MODE_BACKUP].ssk; - subflow = bpf_mptcp_subflow_ctx_by_pos(data, send_info[SSK_MODE_ACTIVE].subflow_id); - if (!subflow) + ssk = send_info[SSK_MODE_ACTIVE].ssk; + if (!ssk || !bpf_sk_stream_memory_free(ssk)) return -1; - ssk = mptcp_subflow_tcp_sock(subflow); - if (!ssk || !sk_stream_memory_free(ssk)) + + subflow = bpf_mptcp_subflow_ctx(ssk); + if (!subflow) return -1; burst = min(MPTCP_SEND_BURST_SIZE, mptcp_wnd_end(msk) - msk->snd_nxt); + ssk = bpf_core_cast(ssk, struct sock); wmem = ssk->sk_wmem_queued; if (!burst) goto out; @@ -XXX,XX +XXX,XX @@ SEC("struct_ops") int BPF_PROG(bpf_burst_get_retrans, struct mptcp_sock *msk, struct mptcp_sched_data *data) { - int backup = MPTCP_SUBFLOWS_MAX, pick = MPTCP_SUBFLOWS_MAX, subflow_id; + struct sock *backup = NULL, *pick = NULL; struct mptcp_subflow_context *subflow; int min_stale_count = INT_MAX; - struct sock *ssk; - for (int i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) { - subflow = bpf_mptcp_subflow_ctx_by_pos(data, i); - if (!subflow) - break; + bpf_for_each(mptcp_subflow, subflow, (struct sock *)msk) { + struct sock *ssk = bpf_mptcp_subflow_tcp_sock(subflow); - if (!mptcp_subflow_active(subflow)) + if (!ssk || !mptcp_subflow_active(subflow)) continue; - ssk = mptcp_subflow_tcp_sock(subflow); /* still data outstanding at TCP level? skip this */ if (!tcp_rtx_and_write_queues_empty(ssk)) { mptcp_pm_subflow_chk_stale(msk, ssk); @@ -XXX,XX +XXX,XX @@ int BPF_PROG(bpf_burst_get_retrans, struct mptcp_sock *msk, } if (subflow->backup || subflow->request_bkup) { - if (backup == MPTCP_SUBFLOWS_MAX) - backup = i; + if (!backup) + backup = ssk; continue; } - if (pick == MPTCP_SUBFLOWS_MAX) - pick = i; + if (!pick) + pick = ssk; } - if (pick < MPTCP_SUBFLOWS_MAX) { - subflow_id = pick; + if (pick) goto out; - } - subflow_id = min_stale_count > 1 ? backup : MPTCP_SUBFLOWS_MAX; + pick = min_stale_count > 1 ? backup : NULL; out: - subflow = bpf_mptcp_subflow_ctx_by_pos(data, subflow_id); + if (!pick) + return -1; + subflow = bpf_mptcp_subflow_ctx(pick); if (!subflow) return -1; mptcp_subflow_set_scheduled(subflow, true); -- 2.43.0
From: Geliang Tang <tanggeliang@kylinos.cn> The mptcp_subflow bpf_iter is added now, it's better to use the helper bpf_for_each(mptcp_subflow) to traverse all subflows on the conn_list of an MPTCP socket and then call kfunc to modify the fields of each subflow in the WIP MPTCP BPF packet scheduler examples, instead of converting them to a fixed array. With this helper, we can get rid of this subflow array "contexts" and the size of it "subflows" in struct mptcp_sched_data. And keep struct mptcp_sched_data for future use. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- include/net/mptcp.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/include/net/mptcp.h b/include/net/mptcp.h index XXXXXXX..XXXXXXX 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -XXX,XX +XXX,XX @@ struct mptcp_out_options { #define MPTCP_SCHED_MAX 128 #define MPTCP_SCHED_BUF_MAX (MPTCP_SCHED_NAME_MAX * MPTCP_SCHED_MAX) -#define MPTCP_SUBFLOWS_MAX 8 - struct mptcp_sched_data { - u8 subflows; - struct mptcp_subflow_context *contexts[MPTCP_SUBFLOWS_MAX]; }; struct mptcp_sched_ops { -- 2.43.0