:p
atchew
Login
From: Geliang Tang <tanggeliang@kylinos.cn> v7: - move cleanup patches out of this set. - rebased. v6: - rebased to "add mptcp_subflow bpf_iter" v10 v5: - patch 2, drop mptcp_sock_type and mptcp_subflow_type. - patch 3, revert "bpf: Export more bpf_burst related functions" - patch 4, merge "bpf: Export more bpf_burst related functions" into it. v4: - patch 2, a new cleanup for "bpf: Add bpf_mptcp_sched_ops". - patch 3 should be reverted. - patch 8, register kfunc_set. v3: - rebased. - put the "drop has_bytes_sent" squash-to patch into this set. v2: - update bpf_rr and bpf_burst With the newly added mptcp_subflow bpf_iter, we can get rid of the subflows array "contexts" in struct mptcp_sched_data. This set uses bpf_for_each(mptcp_subflow) helper to update all the bpf schedules: bpf_for_each(mptcp_subflow, subflow, msk) { ... ... mptcp_subflow_set_scheduled(subflow, true); } Geliang Tang (5): Squash to "selftests/bpf: Add bpf_bkup scheduler & test" Squash to "selftests/bpf: Add bpf_rr scheduler & test" Squash to "selftests/bpf: Add bpf_red scheduler & test" Squash to "selftests/bpf: Add bpf_burst scheduler & test" Squash to "selftests/bpf: Add bpf_first scheduler & test" tools/testing/selftests/bpf/progs/mptcp_bpf.h | 3 - .../selftests/bpf/progs/mptcp_bpf_bkup.c | 18 +---- .../selftests/bpf/progs/mptcp_bpf_burst.c | 79 ++++++++++--------- .../selftests/bpf/progs/mptcp_bpf_first.c | 10 ++- .../selftests/bpf/progs/mptcp_bpf_red.c | 10 +-- .../selftests/bpf/progs/mptcp_bpf_rr.c | 26 +++--- 6 files changed, 67 insertions(+), 79 deletions(-) -- 2.45.2
From: Geliang Tang <tanggeliang@kylinos.cn> Use the newly added bpf_for_each() helper to walk the conn_list. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- .../selftests/bpf/progs/mptcp_bpf_bkup.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_bkup.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_bkup.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_bkup.c +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_bkup.c @@ -XXX,XX +XXX,XX @@ SEC("struct_ops") int BPF_PROG(bpf_bkup_get_subflow, struct mptcp_sock *msk, struct mptcp_sched_data *data) { - int nr = -1; - - for (int i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) { - struct mptcp_subflow_context *subflow; - - subflow = bpf_mptcp_subflow_ctx_by_pos(data, i); - if (!subflow) - break; + struct mptcp_subflow_context *subflow; + bpf_for_each(mptcp_subflow, subflow, msk) { if (!BPF_CORE_READ_BITFIELD_PROBED(subflow, backup) || !BPF_CORE_READ_BITFIELD_PROBED(subflow, request_bkup)) { - nr = i; + mptcp_subflow_set_scheduled(subflow, true); break; } } - if (nr != -1) { - mptcp_subflow_set_scheduled(bpf_mptcp_subflow_ctx_by_pos(data, nr), true); - return -1; - } return 0; } -SEC(".struct_ops") +SEC(".struct_ops.link") struct mptcp_sched_ops bkup = { .init = (void *)mptcp_sched_bkup_init, .release = (void *)mptcp_sched_bkup_release, -- 2.45.2
From: Geliang Tang <tanggeliang@kylinos.cn> Use the newly added bpf_for_each() helper to walk the conn_list. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- .../selftests/bpf/progs/mptcp_bpf_rr.c | 26 ++++++++----------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c @@ -XXX,XX +XXX,XX @@ SEC("struct_ops") int BPF_PROG(bpf_rr_get_subflow, struct mptcp_sock *msk, struct mptcp_sched_data *data) { - struct mptcp_subflow_context *subflow; + struct mptcp_subflow_context *subflow, *next; struct mptcp_rr_storage *ptr; struct sock *last_snd = NULL; - int nr = 0; ptr = bpf_sk_storage_get(&mptcp_rr_map, msk, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); @@ -XXX,XX +XXX,XX @@ int BPF_PROG(bpf_rr_get_subflow, struct mptcp_sock *msk, return -1; last_snd = ptr->last_snd; + next = bpf_mptcp_subflow_ctx(msk->first); - for (int i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) { - subflow = bpf_mptcp_subflow_ctx_by_pos(data, i); - if (!last_snd || !subflow) + bpf_for_each(mptcp_subflow, subflow, msk) { + if (!last_snd) break; - if (mptcp_subflow_tcp_sock(subflow) == last_snd) { - if (i + 1 == MPTCP_SUBFLOWS_MAX || - !bpf_mptcp_subflow_ctx_by_pos(data, i + 1)) + if (bpf_mptcp_subflow_tcp_sock(subflow) == last_snd) { + subflow = bpf_iter_mptcp_subflow_next(&___it); + if (!subflow) break; - nr = i + 1; + next = subflow; break; } } - subflow = bpf_mptcp_subflow_ctx_by_pos(data, nr); - if (!subflow) - return -1; - mptcp_subflow_set_scheduled(subflow, true); - ptr->last_snd = mptcp_subflow_tcp_sock(subflow); + mptcp_subflow_set_scheduled(next, true); + ptr->last_snd = bpf_mptcp_subflow_tcp_sock(next); return 0; } -SEC(".struct_ops") +SEC(".struct_ops.link") struct mptcp_sched_ops rr = { .init = (void *)mptcp_sched_rr_init, .release = (void *)mptcp_sched_rr_release, -- 2.45.2
From: Geliang Tang <tanggeliang@kylinos.cn> Use the newly added bpf_for_each() helper to walk the conn_list. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- tools/testing/selftests/bpf/progs/mptcp_bpf_red.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_red.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_red.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_red.c +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_red.c @@ -XXX,XX +XXX,XX @@ SEC("struct_ops") int BPF_PROG(bpf_red_get_subflow, struct mptcp_sock *msk, struct mptcp_sched_data *data) { - for (int i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) { - if (!bpf_mptcp_subflow_ctx_by_pos(data, i)) - break; + struct mptcp_subflow_context *subflow; - mptcp_subflow_set_scheduled(bpf_mptcp_subflow_ctx_by_pos(data, i), true); - } + bpf_for_each(mptcp_subflow, subflow, msk) + mptcp_subflow_set_scheduled(subflow, true); return 0; } -SEC(".struct_ops") +SEC(".struct_ops.link") struct mptcp_sched_ops red = { .init = (void *)mptcp_sched_red_init, .release = (void *)mptcp_sched_red_release, -- 2.45.2
From: Geliang Tang <tanggeliang@kylinos.cn> Use the newly added bpf_for_each() helper to walk the conn_list. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- .../selftests/bpf/progs/mptcp_bpf_burst.c | 79 ++++++++++--------- 1 file changed, 40 insertions(+), 39 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_burst.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_burst.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_burst.c +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_burst.c @@ -XXX,XX +XXX,XX @@ char _license[] SEC("license") = "GPL"; #define min(a, b) ((a) < (b) ? (a) : (b)) +#define SSK_MODE_ACTIVE 0 +#define SSK_MODE_BACKUP 1 +#define SSK_MODE_MAX 2 + struct bpf_subflow_send_info { __u8 subflow_id; __u64 linger_time; @@ -XXX,XX +XXX,XX @@ extern bool tcp_stream_memory_free(const struct sock *sk, int wake) __ksym; extern bool bpf_mptcp_subflow_queues_empty(struct sock *sk) __ksym; extern void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) __ksym; -#define SSK_MODE_ACTIVE 0 -#define SSK_MODE_BACKUP 1 -#define SSK_MODE_MAX 2 - static __always_inline __u64 div_u64(__u64 dividend, __u32 divisor) { return dividend / divisor; @@ -XXX,XX +XXX,XX @@ static __always_inline bool sk_stream_memory_free(const struct sock *sk) return __sk_stream_memory_free(sk, 0); } +static struct mptcp_subflow_context * +mptcp_lookup_subflow_by_id(struct mptcp_sock *msk, unsigned int id) +{ + struct mptcp_subflow_context *subflow; + + bpf_for_each(mptcp_subflow, subflow, msk) { + if (subflow->subflow_id == id) + return subflow; + } + + return NULL; +} + SEC("struct_ops") void BPF_PROG(mptcp_sched_burst_init, struct mptcp_sock *msk) { @@ -XXX,XX +XXX,XX @@ void BPF_PROG(mptcp_sched_burst_release, struct mptcp_sock *msk) { } -static int bpf_burst_get_send(struct mptcp_sock *msk, - struct mptcp_sched_data *data) +static int bpf_burst_get_send(struct mptcp_sock *msk) { struct bpf_subflow_send_info send_info[SSK_MODE_MAX]; struct mptcp_subflow_context *subflow; @@ -XXX,XX +XXX,XX @@ static int bpf_burst_get_send(struct mptcp_sock *msk, send_info[i].linger_time = -1; } - for (i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) { - bool backup; + bpf_for_each(mptcp_subflow, subflow, msk) { + bool backup = subflow->backup || subflow->request_bkup; - subflow = bpf_mptcp_subflow_ctx_by_pos(data, i); - if (!subflow) - break; - - backup = subflow->backup || subflow->request_bkup; - - ssk = mptcp_subflow_tcp_sock(subflow); + ssk = bpf_mptcp_subflow_tcp_sock(subflow); if (!mptcp_subflow_active(subflow)) continue; @@ -XXX,XX +XXX,XX @@ static int bpf_burst_get_send(struct mptcp_sock *msk, linger_time = div_u64((__u64)ssk->sk_wmem_queued << 32, pace); if (linger_time < send_info[backup].linger_time) { - send_info[backup].subflow_id = i; + send_info[backup].subflow_id = subflow->subflow_id; send_info[backup].linger_time = linger_time; } } @@ -XXX,XX +XXX,XX @@ static int bpf_burst_get_send(struct mptcp_sock *msk, if (!nr_active) send_info[SSK_MODE_ACTIVE].subflow_id = send_info[SSK_MODE_BACKUP].subflow_id; - subflow = bpf_mptcp_subflow_ctx_by_pos(data, send_info[SSK_MODE_ACTIVE].subflow_id); + subflow = mptcp_lookup_subflow_by_id(msk, send_info[SSK_MODE_ACTIVE].subflow_id); if (!subflow) return -1; - ssk = mptcp_subflow_tcp_sock(subflow); + ssk = bpf_mptcp_subflow_tcp_sock(subflow); if (!ssk || !sk_stream_memory_free(ssk)) return -1; @@ -XXX,XX +XXX,XX @@ static int bpf_burst_get_send(struct mptcp_sock *msk, return 0; } -static int bpf_burst_get_retrans(struct mptcp_sock *msk, - struct mptcp_sched_data *data) +static int bpf_burst_get_retrans(struct mptcp_sock *msk) { - int backup = MPTCP_SUBFLOWS_MAX, pick = MPTCP_SUBFLOWS_MAX, subflow_id; + struct sock *backup = NULL, *pick = NULL; struct mptcp_subflow_context *subflow; int min_stale_count = INT_MAX; - struct sock *ssk; - for (int i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) { - subflow = bpf_mptcp_subflow_ctx_by_pos(data, i); - if (!subflow) - break; + bpf_for_each(mptcp_subflow, subflow, msk) { + struct sock *ssk = bpf_mptcp_subflow_tcp_sock(subflow); if (!mptcp_subflow_active(subflow)) continue; - ssk = mptcp_subflow_tcp_sock(subflow); /* still data outstanding at TCP level? skip this */ if (!tcp_rtx_and_write_queues_empty(ssk)) { mptcp_pm_subflow_chk_stale(msk, ssk); @@ -XXX,XX +XXX,XX @@ static int bpf_burst_get_retrans(struct mptcp_sock *msk, } if (subflow->backup || subflow->request_bkup) { - if (backup == MPTCP_SUBFLOWS_MAX) - backup = i; + if (!backup) + backup = ssk; continue; } - if (pick == MPTCP_SUBFLOWS_MAX) - pick = i; + if (!pick) + pick = ssk; } - if (pick < MPTCP_SUBFLOWS_MAX) { - subflow_id = pick; + if (pick) goto out; - } - subflow_id = min_stale_count > 1 ? backup : MPTCP_SUBFLOWS_MAX; + pick = min_stale_count > 1 ? backup : NULL; out: - subflow = bpf_mptcp_subflow_ctx_by_pos(data, subflow_id); + if (!pick) + return -1; + subflow = bpf_mptcp_subflow_ctx(pick); if (!subflow) return -1; mptcp_subflow_set_scheduled(subflow, true); @@ -XXX,XX +XXX,XX @@ int BPF_PROG(bpf_burst_get_subflow, struct mptcp_sock *msk, struct mptcp_sched_data *data) { if (data->reinject) - return bpf_burst_get_retrans(msk, data); - return bpf_burst_get_send(msk, data); + return bpf_burst_get_retrans(msk); + return bpf_burst_get_send(msk); } -SEC(".struct_ops") +SEC(".struct_ops.link") struct mptcp_sched_ops burst = { .init = (void *)mptcp_sched_burst_init, .release = (void *)mptcp_sched_burst_release, -- 2.45.2
From: Geliang Tang <tanggeliang@kylinos.cn> Use the newly added bpf_for_each() helper to walk the conn_list. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- tools/testing/selftests/bpf/progs/mptcp_bpf.h | 3 --- tools/testing/selftests/bpf/progs/mptcp_bpf_first.c | 10 ++++++++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h b/tools/testing/selftests/bpf/progs/mptcp_bpf.h index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h @@ -XXX,XX +XXX,XX @@ extern bool bpf_ipv6_addr_v4mapped(const struct mptcp_addr_info *a) __ksym; extern void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow, bool scheduled) __ksym; -extern struct mptcp_subflow_context * -bpf_mptcp_subflow_ctx_by_pos(const struct mptcp_sched_data *data, unsigned int pos) __ksym; - #endif diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c @@ -XXX,XX +XXX,XX @@ SEC("struct_ops") int BPF_PROG(bpf_first_get_subflow, struct mptcp_sock *msk, struct mptcp_sched_data *data) { - mptcp_subflow_set_scheduled(bpf_mptcp_subflow_ctx_by_pos(data, 0), true); + struct mptcp_subflow_context *subflow; + + bpf_for_each(mptcp_subflow, subflow, msk) { + mptcp_subflow_set_scheduled(subflow, true); + break; + } + return 0; } -SEC(".struct_ops") +SEC(".struct_ops.link") struct mptcp_sched_ops first = { .init = (void *)mptcp_sched_first_init, .release = (void *)mptcp_sched_first_release, -- 2.45.2
From: Geliang Tang <tanggeliang@kylinos.cn> v10: - drop mptcp_subflow_set_scheduled() helper and WRITE_ONCE() in BPF. - add new bpf helper bpf_mptcp_send_info_to_ssk() for burst scheduler. v9: - merge 'Fixes for "use bpf_iter in bpf schedulers" v8' into this set. - rebased on "add netns helpers" v4 v8: - address Mat's comments in v7. - move sk_stream_memory_free check inside bpf_for_each() loop. - implement mptcp_subflow_set_scheduled helper in BPF. - add cleanup patches into this set again. v7: - move cleanup patches out of this set. - rebased. v6: - rebased to "add mptcp_subflow bpf_iter" v10 v5: - patch 2, drop mptcp_sock_type and mptcp_subflow_type. - patch 3, revert "bpf: Export more bpf_burst related functions" - patch 4, merge "bpf: Export more bpf_burst related functions" into it. v4: - patch 2, a new cleanup for "bpf: Add bpf_mptcp_sched_ops". - patch 3 should be reverted. - patch 8, register kfunc_set. v3: - rebased. - put the "drop has_bytes_sent" squash-to patch into this set. v2: - update bpf_rr and bpf_burst With the newly added mptcp_subflow bpf_iter, we can get rid of the subflows array "contexts" in struct mptcp_sched_data. This set uses bpf_for_each(mptcp_subflow) helper to update all the bpf schedules: bpf_for_each(mptcp_subflow, subflow, msk) { ... ... mptcp_subflow_set_scheduled(subflow, true); } Geliang Tang (9): bpf: Add bpf_mptcp_send_info_to_ssk Squash to "selftests/bpf: Add bpf_bkup scheduler & test" Squash to "selftests/bpf: Add bpf_rr scheduler & test" Squash to "selftests/bpf: Add bpf_red scheduler & test" Squash to "selftests/bpf: Add bpf_burst scheduler & test" Squash to "selftests/bpf: Add bpf_first scheduler & test" Revert "mptcp: add sched_data helpers" Squash to "bpf: Export mptcp packet scheduler helpers" mptcp: drop subflow contexts in mptcp_sched_data include/net/mptcp.h | 2 - include/uapi/linux/bpf.h | 7 +++ net/mptcp/bpf.c | 31 +++++++--- net/mptcp/protocol.c | 5 -- net/mptcp/protocol.h | 7 ++- net/mptcp/sched.c | 22 ------- tools/include/uapi/linux/bpf.h | 7 +++ tools/testing/selftests/bpf/progs/mptcp_bpf.h | 3 - .../selftests/bpf/progs/mptcp_bpf_bkup.c | 16 +---- .../selftests/bpf/progs/mptcp_bpf_burst.c | 62 +++++++------------ .../selftests/bpf/progs/mptcp_bpf_first.c | 8 ++- .../selftests/bpf/progs/mptcp_bpf_red.c | 8 +-- .../selftests/bpf/progs/mptcp_bpf_rr.c | 31 +++++----- 13 files changed, 92 insertions(+), 117 deletions(-) -- 2.45.2
From: Geliang Tang <tanggeliang@kylinos.cn> Burst scheduler needs to allocate an array of struct subflow_send_info on the stack and then select a subflow to send data. In order to implement burst scheduler in BPF, this patch adds a new bpf_mptcp_send_info_to_ssk() helper to get ssk from subflow_send_info and sets its parameter type as ARG_PTR_TO_STACK. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- include/uapi/linux/bpf.h | 7 +++++++ net/mptcp/bpf.c | 22 ++++++++++++++++++++++ net/mptcp/protocol.c | 5 ----- net/mptcp/protocol.h | 5 +++++ tools/include/uapi/linux/bpf.h | 7 +++++++ 5 files changed, 41 insertions(+), 5 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index XXXXXXX..XXXXXXX 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -XXX,XX +XXX,XX @@ union bpf_attr { * 0 on success. * * **-ENOENT** if the bpf_local_storage cannot be found. + * + * void *bpf_mptcp_send_info_to_ssk(void *info) + * Description + * Dynamically cast a *info* pointer to a *sock* pointer. + * Return + * *info* if casting is valid, or **NULL** otherwise. */ #define ___BPF_FUNC_MAPPER(FN, ctx...) \ FN(unspec, 0, ##ctx) \ @@ -XXX,XX +XXX,XX @@ union bpf_attr { FN(user_ringbuf_drain, 209, ##ctx) \ FN(cgrp_storage_get, 210, ##ctx) \ FN(cgrp_storage_delete, 211, ##ctx) \ + FN(mptcp_send_info_to_ssk, 212, ##ctx) \ /* */ /* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/bpf.c +++ b/net/mptcp/bpf.c @@ -XXX,XX +XXX,XX @@ static struct bpf_struct_ops bpf_mptcp_sched_ops; static const struct btf_type *mptcp_sock_type, *mptcp_subflow_type __read_mostly; static u32 mptcp_sock_id, mptcp_subflow_id; +BPF_CALL_1(bpf_mptcp_send_info_to_ssk, struct subflow_send_info *, info) +{ + BTF_TYPE_EMIT(struct sock); + + if (info && info->ssk && sk_fullsock(info->ssk) && + info->ssk->sk_protocol == IPPROTO_TCP && + sk_is_mptcp(info->ssk)) + return (unsigned long)info->ssk; + + return (unsigned long)NULL; +} + +static const struct bpf_func_proto bpf_mptcp_send_info_to_ssk_proto = { + .func = bpf_mptcp_send_info_to_ssk, + .gpl_only = false, + .ret_type = RET_PTR_TO_BTF_ID_OR_NULL, + .arg1_type = ARG_PTR_TO_STACK, + .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK], +}; + static const struct bpf_func_proto * bpf_mptcp_sched_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) @@ -XXX,XX +XXX,XX @@ bpf_mptcp_sched_get_func_proto(enum bpf_func_id func_id, return &bpf_skc_to_tcp6_sock_proto; case BPF_FUNC_skc_to_tcp_sock: return &bpf_skc_to_tcp_sock_proto; + case BPF_FUNC_mptcp_send_info_to_ssk: + return &bpf_mptcp_send_info_to_ssk_proto; default: return bpf_base_func_proto(func_id, prog); } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -XXX,XX +XXX,XX @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, sizeof(struct ipv6hdr) - \ sizeof(struct frag_hdr)) -struct subflow_send_info { - struct sock *ssk; - u64 linger_time; -}; - void mptcp_subflow_set_active(struct mptcp_subflow_context *subflow) { if (!subflow->stale) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -XXX,XX +XXX,XX @@ mptcp_subflow_ctx_reset(struct mptcp_subflow_context *subflow) WRITE_ONCE(subflow->local_id, -1); } +struct subflow_send_info { + struct sock *ssk; + u64 linger_time; +}; + /* Convert reset reasons in MPTCP to enum sk_rst_reason type */ static inline enum sk_rst_reason sk_rst_convert_mptcp_reason(u32 reason) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index XXXXXXX..XXXXXXX 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -XXX,XX +XXX,XX @@ union bpf_attr { * 0 on success. * * **-ENOENT** if the bpf_local_storage cannot be found. + * + * void *bpf_mptcp_send_info_to_ssk(void *info) + * Description + * Dynamically cast a *info* pointer to a *sock* pointer. + * Return + * *info* if casting is valid, or **NULL** otherwise. */ #define ___BPF_FUNC_MAPPER(FN, ctx...) \ FN(unspec, 0, ##ctx) \ @@ -XXX,XX +XXX,XX @@ union bpf_attr { FN(user_ringbuf_drain, 209, ##ctx) \ FN(cgrp_storage_get, 210, ##ctx) \ FN(cgrp_storage_delete, 211, ##ctx) \ + FN(mptcp_send_info_to_ssk, 212, ##ctx) \ /* */ /* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't -- 2.45.2
From: Geliang Tang <tanggeliang@kylinos.cn> Use the newly added bpf_for_each() helper to walk the conn_list. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- .../testing/selftests/bpf/progs/mptcp_bpf_bkup.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_bkup.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_bkup.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_bkup.c +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_bkup.c @@ -XXX,XX +XXX,XX @@ SEC("struct_ops") int BPF_PROG(bpf_bkup_get_subflow, struct mptcp_sock *msk, struct mptcp_sched_data *data) { - int nr = -1; - - for (int i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) { - struct mptcp_subflow_context *subflow; - - subflow = bpf_mptcp_subflow_ctx_by_pos(data, i); - if (!subflow) - break; + struct mptcp_subflow_context *subflow; + bpf_for_each(mptcp_subflow, subflow, msk) { if (!BPF_CORE_READ_BITFIELD_PROBED(subflow, backup) || !BPF_CORE_READ_BITFIELD_PROBED(subflow, request_bkup)) { - nr = i; + mptcp_subflow_set_scheduled(subflow, true); break; } } - if (nr != -1) { - mptcp_subflow_set_scheduled(bpf_mptcp_subflow_ctx_by_pos(data, nr), true); - return -1; - } return 0; } -- 2.45.2
From: Geliang Tang <tanggeliang@kylinos.cn> Use the newly added bpf_for_each() helper to walk the conn_list. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- .../selftests/bpf/progs/mptcp_bpf_rr.c | 31 +++++++++---------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c @@ -XXX,XX +XXX,XX @@ SEC("struct_ops") int BPF_PROG(bpf_rr_get_subflow, struct mptcp_sock *msk, struct mptcp_sched_data *data) { - struct mptcp_subflow_context *subflow; + struct mptcp_subflow_context *subflow, *next; struct mptcp_rr_storage *ptr; - struct sock *last_snd = NULL; - int nr = 0; ptr = bpf_sk_storage_get(&mptcp_rr_map, msk, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); if (!ptr) return -1; - last_snd = ptr->last_snd; + next = bpf_mptcp_subflow_ctx(msk->first); + if (!next) + return -1; - for (int i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) { - subflow = bpf_mptcp_subflow_ctx_by_pos(data, i); - if (!last_snd || !subflow) - break; + if (!ptr->last_snd) + goto out; - if (mptcp_subflow_tcp_sock(subflow) == last_snd) { - if (i + 1 == MPTCP_SUBFLOWS_MAX || - !bpf_mptcp_subflow_ctx_by_pos(data, i + 1)) + bpf_for_each(mptcp_subflow, subflow, msk) { + if (mptcp_subflow_tcp_sock(subflow) == ptr->last_snd) { + subflow = bpf_iter_mptcp_subflow_next(&___it); + if (!subflow) break; - nr = i + 1; + next = subflow; break; } } - subflow = bpf_mptcp_subflow_ctx_by_pos(data, nr); - if (!subflow) - return -1; - mptcp_subflow_set_scheduled(subflow, true); - ptr->last_snd = mptcp_subflow_tcp_sock(subflow); +out: + mptcp_subflow_set_scheduled(next, true); + ptr->last_snd = bpf_mptcp_subflow_tcp_sock(next); return 0; } -- 2.45.2
From: Geliang Tang <tanggeliang@kylinos.cn> Use the newly added bpf_for_each() helper to walk the conn_list. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- tools/testing/selftests/bpf/progs/mptcp_bpf_red.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_red.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_red.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_red.c +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_red.c @@ -XXX,XX +XXX,XX @@ SEC("struct_ops") int BPF_PROG(bpf_red_get_subflow, struct mptcp_sock *msk, struct mptcp_sched_data *data) { - for (int i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) { - if (!bpf_mptcp_subflow_ctx_by_pos(data, i)) - break; + struct mptcp_subflow_context *subflow; - mptcp_subflow_set_scheduled(bpf_mptcp_subflow_ctx_by_pos(data, i), true); - } + bpf_for_each(mptcp_subflow, subflow, msk) + mptcp_subflow_set_scheduled(subflow, true); return 0; } -- 2.45.2
From: Geliang Tang <tanggeliang@kylinos.cn> Use the newly added bpf_for_each() helper to walk the conn_list. Use bpf_mptcp_send_info_to_ssk() helper. Drop bpf_subflow_send_info, use subflow_send_info instead. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- .../selftests/bpf/progs/mptcp_bpf_burst.c | 62 +++++++------------ 1 file changed, 24 insertions(+), 38 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_burst.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_burst.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_burst.c +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_burst.c @@ -XXX,XX +XXX,XX @@ char _license[] SEC("license") = "GPL"; #define min(a, b) ((a) < (b) ? (a) : (b)) -struct bpf_subflow_send_info { - __u8 subflow_id; - __u64 linger_time; -}; - extern bool mptcp_subflow_active(struct mptcp_subflow_context *subflow) __ksym; extern void mptcp_set_timeout(struct sock *sk) __ksym; extern __u64 mptcp_wnd_end(const struct mptcp_sock *msk) __ksym; @@ -XXX,XX +XXX,XX @@ void BPF_PROG(mptcp_sched_burst_release, struct mptcp_sock *msk) static int bpf_burst_get_send(struct mptcp_sock *msk, struct mptcp_sched_data *data) { - struct bpf_subflow_send_info send_info[SSK_MODE_MAX]; + struct subflow_send_info send_info[SSK_MODE_MAX]; struct mptcp_subflow_context *subflow; struct sock *sk = (struct sock *)msk; __u32 pace, burst, wmem; @@ -XXX,XX +XXX,XX @@ static int bpf_burst_get_send(struct mptcp_sock *msk, /* pick the subflow with the lower wmem/wspace ratio */ for (i = 0; i < SSK_MODE_MAX; ++i) { - send_info[i].subflow_id = MPTCP_SUBFLOWS_MAX; + send_info[i].ssk = NULL; send_info[i].linger_time = -1; } - for (i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) { - bool backup; - - subflow = bpf_mptcp_subflow_ctx_by_pos(data, i); - if (!subflow) - break; + bpf_for_each(mptcp_subflow, subflow, msk) { + bool backup = subflow->backup || subflow->request_bkup; - backup = subflow->backup || subflow->request_bkup; - - ssk = mptcp_subflow_tcp_sock(subflow); + ssk = bpf_mptcp_subflow_tcp_sock(subflow); if (!mptcp_subflow_active(subflow)) continue; @@ -XXX,XX +XXX,XX @@ static int bpf_burst_get_send(struct mptcp_sock *msk, linger_time = div_u64((__u64)ssk->sk_wmem_queued << 32, pace); if (linger_time < send_info[backup].linger_time) { - send_info[backup].subflow_id = i; + send_info[backup].ssk = ssk; send_info[backup].linger_time = linger_time; } } @@ -XXX,XX +XXX,XX @@ static int bpf_burst_get_send(struct mptcp_sock *msk, /* pick the best backup if no other subflow is active */ if (!nr_active) - send_info[SSK_MODE_ACTIVE].subflow_id = send_info[SSK_MODE_BACKUP].subflow_id; + send_info[SSK_MODE_ACTIVE].ssk = send_info[SSK_MODE_BACKUP].ssk; - subflow = bpf_mptcp_subflow_ctx_by_pos(data, send_info[SSK_MODE_ACTIVE].subflow_id); - if (!subflow) - return -1; - ssk = mptcp_subflow_tcp_sock(subflow); + ssk = bpf_mptcp_send_info_to_ssk(&send_info[SSK_MODE_ACTIVE]); if (!ssk || !sk_stream_memory_free(ssk)) return -1; + subflow = bpf_mptcp_subflow_ctx(ssk); + if (!subflow) + return -1; + burst = min(MPTCP_SEND_BURST_SIZE, mptcp_wnd_end(msk) - msk->snd_nxt); wmem = ssk->sk_wmem_queued; if (!burst) @@ -XXX,XX +XXX,XX @@ static int bpf_burst_get_send(struct mptcp_sock *msk, static int bpf_burst_get_retrans(struct mptcp_sock *msk, struct mptcp_sched_data *data) { - int backup = MPTCP_SUBFLOWS_MAX, pick = MPTCP_SUBFLOWS_MAX, subflow_id; + struct sock *backup = NULL, *pick = NULL; struct mptcp_subflow_context *subflow; int min_stale_count = INT_MAX; - struct sock *ssk; - for (int i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) { - subflow = bpf_mptcp_subflow_ctx_by_pos(data, i); - if (!subflow) - break; + bpf_for_each(mptcp_subflow, subflow, msk) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); if (!mptcp_subflow_active(subflow)) continue; - ssk = mptcp_subflow_tcp_sock(subflow); /* still data outstanding at TCP level? skip this */ if (!tcp_rtx_and_write_queues_empty(ssk)) { mptcp_pm_subflow_chk_stale(msk, ssk); @@ -XXX,XX +XXX,XX @@ static int bpf_burst_get_retrans(struct mptcp_sock *msk, } if (subflow->backup || subflow->request_bkup) { - if (backup == MPTCP_SUBFLOWS_MAX) - backup = i; + if (!backup) + backup = ssk; continue; } - if (pick == MPTCP_SUBFLOWS_MAX) - pick = i; + if (!pick) + pick = ssk; } - if (pick < MPTCP_SUBFLOWS_MAX) { - subflow_id = pick; + if (pick) goto out; - } - subflow_id = min_stale_count > 1 ? backup : MPTCP_SUBFLOWS_MAX; + pick = min_stale_count > 1 ? backup : NULL; out: - subflow = bpf_mptcp_subflow_ctx_by_pos(data, subflow_id); + if (!pick) + return -1; + subflow = bpf_mptcp_subflow_ctx(pick); if (!subflow) return -1; mptcp_subflow_set_scheduled(subflow, true); -- 2.45.2
From: Geliang Tang <tanggeliang@kylinos.cn> Use the newly added bpf_for_each() helper to walk the conn_list. Drop bpf_mptcp_subflow_ctx_by_pos declaration. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- tools/testing/selftests/bpf/progs/mptcp_bpf.h | 3 --- tools/testing/selftests/bpf/progs/mptcp_bpf_first.c | 8 +++++++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h b/tools/testing/selftests/bpf/progs/mptcp_bpf.h index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h @@ -XXX,XX +XXX,XX @@ bpf_mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow) __ksym; extern void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow, bool scheduled) __ksym; -extern struct mptcp_subflow_context * -bpf_mptcp_subflow_ctx_by_pos(const struct mptcp_sched_data *data, unsigned int pos) __ksym; - #endif diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c @@ -XXX,XX +XXX,XX @@ SEC("struct_ops") int BPF_PROG(bpf_first_get_subflow, struct mptcp_sock *msk, struct mptcp_sched_data *data) { - mptcp_subflow_set_scheduled(bpf_mptcp_subflow_ctx_by_pos(data, 0), true); + struct mptcp_subflow_context *subflow; + + subflow = bpf_mptcp_subflow_ctx(msk->first); + if (!subflow) + return -1; + + mptcp_subflow_set_scheduled(subflow, true); return 0; } -- 2.45.2
From: Geliang Tang <tanggeliang@kylinos.cn> Drop this patch. bpf_mptcp_subflow_ctx_by_pos and mptcp_sched_data_set_contexts are uesless now. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- net/mptcp/bpf.c | 8 -------- net/mptcp/protocol.h | 2 -- net/mptcp/sched.c | 22 ---------------------- 3 files changed, 32 deletions(-) diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/bpf.c +++ b/net/mptcp/bpf.c @@ -XXX,XX +XXX,XX @@ __bpf_kfunc static void bpf_mptcp_sock_release(struct mptcp_sock *msk) WARN_ON_ONCE(!sk || !refcount_dec_not_one(&sk->sk_refcnt)); } -__bpf_kfunc struct mptcp_subflow_context * -bpf_mptcp_subflow_ctx_by_pos(const struct mptcp_sched_data *data, unsigned int pos) -{ - if (pos >= MPTCP_SUBFLOWS_MAX) - return NULL; - return data->contexts[pos]; -} - __bpf_kfunc static bool bpf_mptcp_subflow_queues_empty(struct sock *sk) { return tcp_rtx_queue_empty(sk); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -XXX,XX +XXX,XX @@ void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk); void mptcp_sock_graft(struct sock *sk, struct socket *parent); u64 mptcp_wnd_end(const struct mptcp_sock *msk); void mptcp_set_timeout(struct sock *sk); -struct mptcp_subflow_context * -bpf_mptcp_subflow_ctx_by_pos(const struct mptcp_sched_data *data, unsigned int pos); struct sock *__mptcp_nmpc_sk(struct mptcp_sock *msk); bool __mptcp_close(struct sock *sk, long timeout); void mptcp_cancel_work(struct sock *sk); diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/sched.c +++ b/net/mptcp/sched.c @@ -XXX,XX +XXX,XX @@ void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow, WRITE_ONCE(subflow->scheduled, scheduled); } -static void mptcp_sched_data_set_contexts(const struct mptcp_sock *msk, - struct mptcp_sched_data *data) -{ - struct mptcp_subflow_context *subflow; - int i = 0; - - mptcp_for_each_subflow(msk, subflow) { - if (i == MPTCP_SUBFLOWS_MAX) { - pr_warn_once("too many subflows"); - break; - } - mptcp_subflow_set_scheduled(subflow, false); - data->contexts[i++] = subflow; - } - data->subflows = i; - - for (; i < MPTCP_SUBFLOWS_MAX; i++) - data->contexts[i] = NULL; -} - int mptcp_sched_get_send(struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow; @@ -XXX,XX +XXX,XX @@ int mptcp_sched_get_send(struct mptcp_sock *msk) data.reinject = false; if (msk->sched == &mptcp_sched_default || !msk->sched) return mptcp_sched_default_get_subflow(msk, &data); - mptcp_sched_data_set_contexts(msk, &data); return msk->sched->get_subflow(msk, &data); } @@ -XXX,XX +XXX,XX @@ int mptcp_sched_get_retrans(struct mptcp_sock *msk) data.reinject = true; if (msk->sched == &mptcp_sched_default || !msk->sched) return mptcp_sched_default_get_subflow(msk, &data); - mptcp_sched_data_set_contexts(msk, &data); return msk->sched->get_subflow(msk, &data); } -- 2.45.2
From: Geliang Tang <tanggeliang@kylinos.cn> Remove bpf_mptcp_subflow_ctx_by_pos from BPF kfunc set. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- net/mptcp/bpf.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/bpf.c +++ b/net/mptcp/bpf.c @@ -XXX,XX +XXX,XX @@ static const struct btf_kfunc_id_set bpf_mptcp_common_kfunc_set = { BTF_KFUNCS_START(bpf_mptcp_sched_kfunc_ids) BTF_ID_FLAGS(func, mptcp_subflow_set_scheduled) -BTF_ID_FLAGS(func, bpf_mptcp_subflow_ctx_by_pos) BTF_ID_FLAGS(func, mptcp_subflow_active) BTF_ID_FLAGS(func, mptcp_set_timeout) BTF_ID_FLAGS(func, mptcp_wnd_end) -- 2.45.2
From: Geliang Tang <tanggeliang@kylinos.cn> The mptcp_subflow bpf_iter is added now, it's better to use the helper bpf_for_each(mptcp_subflow) to traverse all subflows on the conn_list of an MPTCP socket and then call kfunc to modify the fields of each subflow in the WIP MPTCP BPF packet scheduler examples, instead of converting them to a fixed array. With this helper, we can get rid of this subflow array "contexts" and the size of it "subflows" in struct mptcp_sched_data. Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> --- include/net/mptcp.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/net/mptcp.h b/include/net/mptcp.h index XXXXXXX..XXXXXXX 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -XXX,XX +XXX,XX @@ struct mptcp_out_options { struct mptcp_sched_data { bool reinject; - u8 subflows; - struct mptcp_subflow_context *contexts[MPTCP_SUBFLOWS_MAX]; }; struct mptcp_sched_ops { -- 2.45.2