:p
atchew
Login
There's a bug in bpf_burst. snd_burst stored in mptcp_burst_storage in BPF context is not used. msk->snd_burst is still used in kernel space. To fix this, add two new interfaces in mptcp_sched_ops to get and set scheduler's paramters from BPF context to kernel space. Geliang Tang (11): Squash to "mptcp: add struct mptcp_sched_ops" Squash to "mptcp: add scheduler wrappers" mptcp: use snd_burst wrappers Squash to "mptcp: register default scheduler" Squash to "bpf: Add bpf_mptcp_sched_ops" Squash to "selftests/bpf: Add mptcp sched structs" Squash to "selftests/bpf: Add bpf_bkup scheduler" Squash to "selftests/bpf: Add bpf_rr scheduler" Squash to "selftests/bpf: Add bpf_burst scheduler" selftests/bpf: Add bpf_stale scheduler selftests/bpf: Add bpf_stale test include/net/mptcp.h | 10 ++ net/mptcp/bpf.c | 27 ++- net/mptcp/protocol.c | 8 +- net/mptcp/protocol.h | 2 + net/mptcp/sched.c | 49 ++++++ tools/testing/selftests/bpf/bpf_tcp_helpers.h | 11 ++ .../testing/selftests/bpf/prog_tests/mptcp.c | 38 ++++ .../selftests/bpf/progs/mptcp_bpf_bkup.c | 4 +- .../selftests/bpf/progs/mptcp_bpf_burst.c | 43 ++++- .../selftests/bpf/progs/mptcp_bpf_rr.c | 2 + .../selftests/bpf/progs/mptcp_bpf_stale.c | 163 ++++++++++++++++++ 11 files changed, 339 insertions(+), 18 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/mptcp_bpf_stale.c -- 2.35.3
Add get_params and set_params interfaces. Signed-off-by: Geliang Tang <geliang.tang@suse.com> --- include/net/mptcp.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/include/net/mptcp.h b/include/net/mptcp.h index XXXXXXX..XXXXXXX 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -XXX,XX +XXX,XX @@ struct mptcp_sched_data { struct mptcp_subflow_context *contexts[MPTCP_SUBFLOWS_MAX]; }; +struct mptcp_sched_params { + union { + int snd_burst; + }; +}; + struct mptcp_sched_ops { void (*data_init)(struct mptcp_sock *msk, struct mptcp_sched_data *data); int (*get_subflow)(struct mptcp_sock *msk, const struct mptcp_sched_data *data); + int (*get_params)(struct mptcp_sock *msk, + struct mptcp_sched_params *params); + int (*set_params)(struct mptcp_sock *msk, + struct mptcp_sched_params *params); char name[MPTCP_SCHED_NAME_MAX]; struct module *owner; -- 2.35.3
Add mptcp_get_snd_burst and mptcp_set_snd_burst. Signed-off-by: Geliang Tang <geliang.tang@suse.com> --- net/mptcp/protocol.h | 2 ++ net/mptcp/sched.c | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -XXX,XX +XXX,XX @@ struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk); struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk); int mptcp_sched_get_send(struct mptcp_sock *msk); int mptcp_sched_get_retrans(struct mptcp_sock *msk); +int mptcp_get_snd_burst(struct mptcp_sock *msk); +int mptcp_set_snd_burst(struct mptcp_sock *msk, int burst); static inline bool __tcp_can_send(const struct sock *ssk) { diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/sched.c +++ b/net/mptcp/sched.c @@ -XXX,XX +XXX,XX @@ int mptcp_sched_get_retrans(struct mptcp_sock *msk) msk->sched->data_init(msk, &data); return msk->sched->get_subflow(msk, &data); } + +int mptcp_get_snd_burst(struct mptcp_sock *msk) +{ + if (!msk->sched) + return msk->snd_burst; + + if (msk->sched->get_params) { + struct mptcp_sched_params params; + + msk->sched->get_params(msk, ¶ms); + return params.snd_burst; + } + return 0; +} + + +int mptcp_set_snd_burst(struct mptcp_sock *msk, int burst) +{ + if (!msk->sched) { + msk->snd_burst = burst; + return 0; + } + + if (msk->sched->set_params) { + struct mptcp_sched_params params; + + params.snd_burst = burst; + return msk->sched->set_params(msk, ¶ms); + } + return 0; + +} -- 2.35.3
Use mptcp_get_snd_burst() and mptcp_set_snd_burst() wrappers, instead of read and set msk->snd_burst directly. Signed-off-by: Geliang Tang <geliang.tang@suse.com> --- net/mptcp/protocol.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -XXX,XX +XXX,XX @@ struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) subflow->avg_pacing_rate = div_u64((u64)subflow->avg_pacing_rate * wmem + READ_ONCE(ssk->sk_pacing_rate) * burst, burst + wmem); - msk->snd_burst = burst; + mptcp_set_snd_burst(msk, burst); return ssk; } @@ -XXX,XX +XXX,XX @@ static void mptcp_update_post_push(struct mptcp_sock *msk, dfrag->already_sent += sent; - msk->snd_burst -= sent; + mptcp_set_snd_burst(msk, mptcp_get_snd_burst(msk) - sent); snd_nxt_new += dfrag->already_sent; @@ -XXX,XX +XXX,XX @@ static int __subflow_push_pending(struct sock *sk, struct sock *ssk, } WRITE_ONCE(msk->first_pending, mptcp_send_next(sk)); - if (msk->snd_burst <= 0 || + if (mptcp_get_snd_burst(msk) <= 0 || !sk_stream_memory_free(ssk) || !mptcp_subflow_active(mptcp_subflow_ctx(ssk))) { err = copied; @@ -XXX,XX +XXX,XX @@ bool __mptcp_retransmit_pending_data(struct sock *sk) mptcp_data_unlock(sk); msk->first_pending = rtx_head; - msk->snd_burst = 0; + mptcp_set_snd_burst(msk, 0); /* be sure to clear the "sent status" on all re-injected fragments */ list_for_each_entry(cur, &msk->rtx_queue, list) { -- 2.35.3
Add .get_params and .set_params. Signed-off-by: Geliang Tang <geliang.tang@suse.com> --- net/mptcp/sched.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/sched.c +++ b/net/mptcp/sched.c @@ -XXX,XX +XXX,XX @@ static int mptcp_sched_default_get_subflow(struct mptcp_sock *msk, return 0; } +static int mptcp_sched_default_get_params(struct mptcp_sock *msk, + struct mptcp_sched_params *params) +{ + params->snd_burst = msk->snd_burst; + return 0; +} + + +static int mptcp_sched_default_set_params(struct mptcp_sock *msk, + struct mptcp_sched_params *params) +{ + msk->snd_burst = params->snd_burst; + return 0; +} + static struct mptcp_sched_ops mptcp_sched_default = { .data_init = mptcp_sched_default_data_init, .get_subflow = mptcp_sched_default_get_subflow, + .get_params = mptcp_sched_default_get_params, + .set_params = mptcp_sched_default_set_params, .name = "default", .owner = THIS_MODULE, }; -- 2.35.3
Add write access to params->snd_burst. Signed-off-by: Geliang Tang <geliang.tang@suse.com> --- net/mptcp/bpf.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/bpf.c +++ b/net/mptcp/bpf.c @@ -XXX,XX +XXX,XX @@ #ifdef CONFIG_BPF_JIT extern struct bpf_struct_ops bpf_mptcp_sched_ops; -static const struct btf_type *mptcp_sched_type __read_mostly; -static u32 mptcp_sched_id; +static const struct btf_type *mptcp_subflow_type __read_mostly; +static const struct btf_type *mptcp_params_type __read_mostly; +static u32 mptcp_subflow_id, mptcp_params_id; static const struct bpf_func_proto * bpf_mptcp_sched_get_func_proto(enum bpf_func_id func_id, @@ -XXX,XX +XXX,XX @@ static int bpf_mptcp_sched_btf_struct_access(struct bpf_verifier_log *log, size_t end; t = btf_type_by_id(reg->btf, reg->btf_id); - if (t != mptcp_sched_type) { - bpf_log(log, "only access to mptcp_subflow_context is supported\n"); + if (t != mptcp_subflow_type && t != mptcp_params_type) { + bpf_log(log, "only access to mptcp_sched is supported\n"); return -EACCES; } @@ -XXX,XX +XXX,XX @@ static int bpf_mptcp_sched_btf_struct_access(struct bpf_verifier_log *log, case offsetof(struct mptcp_subflow_context, avg_pacing_rate): end = offsetofend(struct mptcp_subflow_context, avg_pacing_rate); break; + case offsetof(struct mptcp_sched_params, snd_burst): + end = offsetofend(struct mptcp_sched_params, snd_burst); + break; default: - bpf_log(log, "no write support to mptcp_subflow_context at off %d\n", off); + bpf_log(log, "no write support to mptcp_sched at off %d\n", off); return -EACCES; } if (off + size > end) { - bpf_log(log, "access beyond mptcp_subflow_context at off %u size %u ended at %zu", + bpf_log(log, "access beyond mptcp_sched at off %u size %u ended at %zu", off, size, end); return -EACCES; } @@ -XXX,XX +XXX,XX @@ static int bpf_mptcp_sched_init(struct btf *btf) BTF_KIND_STRUCT); if (type_id < 0) return -EINVAL; - mptcp_sched_id = type_id; - mptcp_sched_type = btf_type_by_id(btf, mptcp_sched_id); + mptcp_subflow_id = type_id; + mptcp_subflow_type = btf_type_by_id(btf, mptcp_subflow_id); + + type_id = btf_find_by_name_kind(btf, "mptcp_sched_params", + BTF_KIND_STRUCT); + if (type_id < 0) + return -EINVAL; + mptcp_params_id = type_id; + mptcp_params_type = btf_type_by_id(btf, mptcp_params_id); return 0; } -- 2.35.3
Add get_params and set_params. Signed-off-by: Geliang Tang <geliang.tang@suse.com> --- tools/testing/selftests/bpf/bpf_tcp_helpers.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h +++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h @@ -XXX,XX +XXX,XX @@ struct mptcp_sched_data { __u8 subflows; } __attribute__((preserve_access_index)); +struct mptcp_sched_params { + union { + int snd_burst; + }; +}; + struct mptcp_sched_ops { char name[MPTCP_SCHED_NAME_MAX]; @@ -XXX,XX +XXX,XX @@ struct mptcp_sched_ops { struct mptcp_sched_data *data); int (*get_subflow)(struct mptcp_sock *msk, const struct mptcp_sched_data *data); + int (*get_params)(struct mptcp_sock *msk, + struct mptcp_sched_params *params); + int (*set_params)(struct mptcp_sock *msk, + struct mptcp_sched_params *params); void *owner; }; -- 2.35.3
Return -1 when no subflow is selected. Signed-off-by: Geliang Tang <geliang.tang@suse.com> --- tools/testing/selftests/bpf/progs/mptcp_bpf_bkup.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_bkup.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_bkup.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_bkup.c +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_bkup.c @@ -XXX,XX +XXX,XX @@ int BPF_STRUCT_OPS(bpf_bkup_get_subflow, struct mptcp_sock *msk, } } - if (nr != -1) + if (nr != -1) { mptcp_subflow_set_scheduled(mptcp_subflow_ctx_by_pos(data, nr), true); + return -1; + } return 0; } -- 2.35.3
Alloc and init mptcp_rr_map first in mptcp_sched_rr_init(). Signed-off-by: Geliang Tang <geliang.tang@suse.com> --- tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c @@ -XXX,XX +XXX,XX @@ struct { SEC("struct_ops/mptcp_sched_rr_init") void BPF_PROG(mptcp_sched_rr_init, struct mptcp_sock *msk) { + bpf_sk_storage_get(&mptcp_rr_map, msk, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); } SEC("struct_ops/mptcp_sched_rr_release") -- 2.35.3
Add .get_params and .set_params. Signed-off-by: Geliang Tang <geliang.tang@suse.com> --- .../selftests/bpf/progs/mptcp_bpf_burst.c | 43 ++++++++++++++++--- 1 file changed, 38 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_burst.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_burst.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_burst.c +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_burst.c @@ -XXX,XX +XXX,XX @@ static __always_inline bool sk_stream_memory_free(const struct sock *sk) SEC("struct_ops/mptcp_sched_burst_init") void BPF_PROG(mptcp_sched_burst_init, struct mptcp_sock *msk) { + bpf_sk_storage_get(&mptcp_burst_map, msk, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); } SEC("struct_ops/mptcp_sched_burst_release") @@ -XXX,XX +XXX,XX @@ void BPF_STRUCT_OPS(bpf_burst_data_init, struct mptcp_sock *msk, mptcp_sched_data_set_contexts(msk, data); } +static int set_snd_burst(struct mptcp_sock *msk, int burst) +{ + struct mptcp_burst_storage *ptr; + + ptr = bpf_sk_storage_get(&mptcp_burst_map, msk, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!ptr) + return -1; + + ptr->snd_burst = burst; + return 0; +} + static int bpf_burst_get_send(struct mptcp_sock *msk, const struct mptcp_sched_data *data) { struct subflow_send_info send_info[SSK_MODE_MAX]; struct mptcp_subflow_context *subflow; struct sock *sk = (struct sock *)msk; - struct mptcp_burst_storage *ptr; __u32 pace, burst, wmem; __u64 linger_time; struct sock *ssk; @@ -XXX,XX +XXX,XX @@ static int bpf_burst_get_send(struct mptcp_sock *msk, subflow->avg_pacing_rate = div_u64((__u64)subflow->avg_pacing_rate * wmem + ssk->sk_pacing_rate * burst, burst + wmem); - ptr = bpf_sk_storage_get(&mptcp_burst_map, msk, 0, - BPF_LOCAL_STORAGE_GET_F_CREATE); - if (ptr) - ptr->snd_burst = burst; + set_snd_burst(msk, burst); out: mptcp_subflow_set_scheduled(subflow, true); @@ -XXX,XX +XXX,XX @@ int BPF_STRUCT_OPS(bpf_burst_get_subflow, struct mptcp_sock *msk, return bpf_burst_get_send(msk, data); } +int BPF_STRUCT_OPS(bpf_burst_get_params, struct mptcp_sock *msk, + struct mptcp_sched_params *params) +{ + struct mptcp_burst_storage *ptr; + + ptr = bpf_sk_storage_get(&mptcp_burst_map, msk, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!ptr) + return -1; + + params->snd_burst = ptr->snd_burst; + return 0; +} + +int BPF_STRUCT_OPS(bpf_burst_set_params, struct mptcp_sock *msk, + struct mptcp_sched_params *params) +{ + return set_snd_burst(msk, params->snd_burst); +} + SEC(".struct_ops") struct mptcp_sched_ops burst = { .init = (void *)mptcp_sched_burst_init, .release = (void *)mptcp_sched_burst_release, .data_init = (void *)bpf_burst_data_init, .get_subflow = (void *)bpf_burst_get_subflow, + .get_params = (void *)bpf_burst_get_params, + .set_params = (void *)bpf_burst_set_params, .name = "bpf_burst", }; -- 2.35.3
This patch implements the setting a subflow as stale/unstale in BPF MPTCP scheduler, named bpf_stale. The staled subflow id will be added into a map in sk_storage. Two helper mptcp_subflow_set_stale() and mptcp_subflow_clear_stale() are added. In this test, subflow 1 is set as stale in bpf_stale_data_init(). Each subflow is checked whether it's a stale one in bpf_stale_get_subflow() to select a unstale subflow to send data. Signed-off-by: Geliang Tang <geliang.tang@suse.com> --- tools/testing/selftests/bpf/bpf_tcp_helpers.h | 1 + .../selftests/bpf/progs/mptcp_bpf_stale.c | 163 ++++++++++++++++++ 2 files changed, 164 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/mptcp_bpf_stale.c diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h +++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h @@ -XXX,XX +XXX,XX @@ struct mptcp_subflow_context { unsigned long avg_pacing_rate; __u32 backup : 1; __u8 stale_count; + __u32 subflow_id; struct sock *tcp_sock; /* tcp sk backpointer */ } __attribute__((preserve_access_index)); diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_stale.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_stale.c new file mode 100644 index XXXXXXX..XXXXXXX --- /dev/null +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_stale.c @@ -XXX,XX +XXX,XX @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023, SUSE. */ + +#include <linux/bpf.h> +#include "bpf_tcp_helpers.h" + +char _license[] SEC("license") = "GPL"; + +struct mptcp_stale_storage { + __u8 nr; + __u32 ids[MPTCP_SUBFLOWS_MAX]; +}; + +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct mptcp_stale_storage); +} mptcp_stale_map SEC(".maps"); + +static void mptcp_subflow_set_stale(struct mptcp_stale_storage *storage, + __u32 subflow_id) +{ + if (!subflow_id) + return; + + for (int i = 0; i < storage->nr && i < MPTCP_SUBFLOWS_MAX; i++) { + if (storage->ids[i] == subflow_id) + return; + } + + if (storage->nr < MPTCP_SUBFLOWS_MAX - 1) + storage->ids[storage->nr++] = subflow_id; +} + +static void mptcp_subflow_clear_stale(struct mptcp_stale_storage *storage, + __u32 subflow_id) +{ + if (!subflow_id) + return; + + for (int i = 0; i < storage->nr && i < MPTCP_SUBFLOWS_MAX; i++) { + if (storage->ids[i] == subflow_id) { + for (int j = i; j < MPTCP_SUBFLOWS_MAX - 1; j++) { + if (!storage->ids[j + 1]) + break; + storage->ids[j] = storage->ids[j + 1]; + storage->ids[j + 1] = 0; + } + storage->nr--; + return; + } + } +} + +static bool mptcp_subflow_is_stale(struct mptcp_stale_storage *storage, + __u32 subflow_id) +{ + for (int i = 0; i < storage->nr && i < MPTCP_SUBFLOWS_MAX; i++) { + if (storage->ids[i] == subflow_id) + return true; + } + + return false; +} + +static bool mptcp_subflow_is_active(struct mptcp_sched_data *data, + __u32 subflow_id) +{ + for (int i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) { + struct mptcp_subflow_context *subflow; + + subflow = mptcp_subflow_ctx_by_pos(data, i); + if (!subflow) + break; + if (subflow->subflow_id == subflow_id) + return true; + } + + return false; +} + +SEC("struct_ops/mptcp_sched_stale_init") +void BPF_PROG(mptcp_sched_stale_init, struct mptcp_sock *msk) +{ + struct mptcp_stale_storage *storage; + + storage = bpf_sk_storage_get(&mptcp_stale_map, msk, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!storage) + return; + + storage->nr = 0; +} + +SEC("struct_ops/mptcp_sched_stale_release") +void BPF_PROG(mptcp_sched_stale_release, struct mptcp_sock *msk) +{ + bpf_sk_storage_delete(&mptcp_stale_map, msk); +} + +void BPF_STRUCT_OPS(bpf_stale_data_init, struct mptcp_sock *msk, + struct mptcp_sched_data *data) +{ + struct mptcp_subflow_context *subflow; + struct mptcp_stale_storage *storage; + + mptcp_sched_data_set_contexts(msk, data); + + storage = bpf_sk_storage_get(&mptcp_stale_map, msk, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!storage) + return; + + for (int i = 0; i < storage->nr && i < MPTCP_SUBFLOWS_MAX; i++) { + if (!mptcp_subflow_is_active(data, storage->ids[i])) + mptcp_subflow_clear_stale(storage, storage->ids[i]); + } + + subflow = mptcp_subflow_ctx_by_pos(data, 1); + if (subflow) + mptcp_subflow_set_stale(storage, subflow->subflow_id); +} + +int BPF_STRUCT_OPS(bpf_stale_get_subflow, struct mptcp_sock *msk, + const struct mptcp_sched_data *data) +{ + struct mptcp_stale_storage *storage; + int nr = -1; + + storage = bpf_sk_storage_get(&mptcp_stale_map, msk, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!storage) + return -1; + + for (int i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) { + struct mptcp_subflow_context *subflow; + + subflow = mptcp_subflow_ctx_by_pos(data, i); + if (!subflow) + break; + + if (mptcp_subflow_is_stale(storage, subflow->subflow_id)) + continue; + + nr = i; + } + + if (nr != -1) { + mptcp_subflow_set_scheduled(mptcp_subflow_ctx_by_pos(data, nr), true); + return -1; + } + return 0; +} + +SEC(".struct_ops") +struct mptcp_sched_ops stale = { + .init = (void *)mptcp_sched_stale_init, + .release = (void *)mptcp_sched_stale_release, + .data_init = (void *)bpf_stale_data_init, + .get_subflow = (void *)bpf_stale_get_subflow, + .name = "bpf_stale", +}; -- 2.35.3
This patch adds the bpf_stale scheduler test: test_stale(). Use sysctl to set net.mptcp.scheduler to use this sched. Add two veth net devices to simulate the multiple addresses case. Use 'ip mptcp endpoint' command to add the new endpoint ADDR_2 to PM netlink. Send data and check bytes_sent of 'ss' output after it to make sure the data has been only sent on ADDR_1 since ADDR_2 is set as stale. Signed-off-by: Geliang Tang <geliang.tang@suse.com> --- .../testing/selftests/bpf/prog_tests/mptcp.c | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -XXX,XX +XXX,XX @@ #include "mptcp_bpf_rr.skel.h" #include "mptcp_bpf_red.skel.h" #include "mptcp_bpf_burst.skel.h" +#include "mptcp_bpf_stale.skel.h" char NS_TEST[32]; @@ -XXX,XX +XXX,XX @@ static void test_burst(void) mptcp_bpf_burst__destroy(burst_skel); } +static void test_stale(void) +{ + struct mptcp_bpf_stale *stale_skel; + int server_fd, client_fd; + struct nstoken *nstoken; + struct bpf_link *link; + + stale_skel = mptcp_bpf_stale__open_and_load(); + if (!ASSERT_OK_PTR(stale_skel, "bpf_stale__open_and_load")) + return; + + link = bpf_map__attach_struct_ops(stale_skel->maps.stale); + if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops")) { + mptcp_bpf_stale__destroy(stale_skel); + return; + } + + nstoken = sched_init("subflow", "bpf_stale"); + if (!ASSERT_OK_PTR(nstoken, "sched_init:bpf_stale")) + goto fail; + server_fd = start_mptcp_server(AF_INET, ADDR_1, PORT_1, 0); + client_fd = connect_to_fd(server_fd, 0); + + send_data(server_fd, client_fd, "bpf_stale"); + ASSERT_OK(has_bytes_sent(ADDR_1), "has_bytes_sent addr_1"); + ASSERT_GT(has_bytes_sent(ADDR_2), 0, "has_bytes_sent addr_2"); + + close(client_fd); + close(server_fd); +fail: + cleanup_netns(nstoken); + bpf_link__destroy(link); + mptcp_bpf_stale__destroy(stale_skel); +} + void test_mptcp(void) { if (test__start_subtest("base")) @@ -XXX,XX +XXX,XX @@ void test_mptcp(void) test_red(); if (test__start_subtest("burst")) test_burst(); + if (test__start_subtest("stale")) + test_stale(); } -- 2.35.3
v2: - keep sched API unchanged. - use bpf_sk_storage_lookup to get snd_burst from BPF context. - applied after "add bpf_stale scheduler v3" serise. v1: There's a bug in bpf_burst. snd_burst stored in mptcp_burst_storage in BPF context is not used. msk->snd_burst is still used in kernel space. To fix this, add two new interfaces in mptcp_sched_ops to get and set scheduler's paramters from BPF context to kernel space. Geliang Tang (2): mptcp: add set/get params wrappers mptcp: add bpf_burst set/get params include/net/bpf_sk_storage.h | 7 +++++ net/core/bpf_sk_storage.c | 2 +- net/mptcp/protocol.c | 8 ++--- net/mptcp/protocol.h | 2 ++ net/mptcp/sched.c | 57 ++++++++++++++++++++++++++++++++++++ 5 files changed, 71 insertions(+), 5 deletions(-) -- 2.35.3
Add two sched params wrappers: mptcp_sched_set_params() and mptcp_sched_set_params(). Use these wrappers instead of get and set msk->snd_burst directly. Signed-off-by: Geliang Tang <geliang.tang@suse.com> --- net/mptcp/protocol.c | 8 ++++---- net/mptcp/protocol.h | 2 ++ net/mptcp/sched.c | 18 ++++++++++++++++++ 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -XXX,XX +XXX,XX @@ struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) subflow->avg_pacing_rate = div_u64((u64)subflow->avg_pacing_rate * wmem + READ_ONCE(ssk->sk_pacing_rate) * burst, burst + wmem); - msk->snd_burst = burst; + mptcp_sched_set_params(msk, burst); return ssk; } @@ -XXX,XX +XXX,XX @@ static void mptcp_update_post_push(struct mptcp_sock *msk, dfrag->already_sent += sent; - msk->snd_burst -= sent; + mptcp_sched_set_params(msk, mptcp_sched_get_params(msk) - sent); snd_nxt_new += dfrag->already_sent; @@ -XXX,XX +XXX,XX @@ static int __subflow_push_pending(struct sock *sk, struct sock *ssk, } WRITE_ONCE(msk->first_pending, mptcp_send_next(sk)); - if (msk->snd_burst <= 0 || + if (mptcp_sched_get_params(msk) <= 0 || !sk_stream_memory_free(ssk) || !mptcp_subflow_active(mptcp_subflow_ctx(ssk))) { err = copied; @@ -XXX,XX +XXX,XX @@ bool __mptcp_retransmit_pending_data(struct sock *sk) mptcp_data_unlock(sk); msk->first_pending = rtx_head; - msk->snd_burst = 0; + mptcp_sched_set_params(msk, 0); /* be sure to clear the "sent status" on all re-injected fragments */ list_for_each_entry(cur, &msk->rtx_queue, list) { diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -XXX,XX +XXX,XX @@ void mptcp_sched_data_set_contexts(const struct mptcp_sock *msk, struct mptcp_sched_data *data); struct mptcp_subflow_context * mptcp_subflow_ctx_by_pos(const struct mptcp_sched_data *data, unsigned int pos); +int mptcp_sched_get_params(struct mptcp_sock *msk); +int mptcp_sched_set_params(struct mptcp_sock *msk, int burst); static inline bool __tcp_can_send(const struct sock *ssk) { diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/sched.c +++ b/net/mptcp/sched.c @@ -XXX,XX +XXX,XX @@ mptcp_subflow_ctx_by_pos(const struct mptcp_sched_data *data, unsigned int pos) return NULL; return data->contexts[pos]; } + +int mptcp_sched_get_params(struct mptcp_sock *msk) +{ + if (msk->sched == &mptcp_sched_default) + return msk->snd_burst; + + return 0; +} + +int mptcp_sched_set_params(struct mptcp_sock *msk, int burst) +{ + if (msk->sched == &mptcp_sched_default) { + msk->snd_burst = burst; + return 0; + } + + return 0; +} -- 2.35.3
This patch implements the set/get params interfaces for bpf_burst scheduler. Export bpf_sk_storage_lookup(), use it to get bpf_local_storage_data from sk->sk_bpf_storage, then get or set its params ptr->snd_burst. Signed-off-by: Geliang Tang <geliang.tang@suse.com> --- include/net/bpf_sk_storage.h | 7 +++++++ net/core/bpf_sk_storage.c | 2 +- net/mptcp/sched.c | 39 ++++++++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 1 deletion(-) diff --git a/include/net/bpf_sk_storage.h b/include/net/bpf_sk_storage.h index XXXXXXX..XXXXXXX 100644 --- a/include/net/bpf_sk_storage.h +++ b/include/net/bpf_sk_storage.h @@ -XXX,XX +XXX,XX @@ int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag, struct sock *sk, struct sk_buff *skb, int stg_array_type, unsigned int *res_diag_size); +struct bpf_local_storage_data * +bpf_sk_storage_lookup(struct sock *sk, struct bpf_map *map, bool cacheit_lockit); #else static inline int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk) @@ -XXX,XX +XXX,XX @@ static inline int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag, { return 0; } +struct bpf_local_storage_data * +bpf_sk_storage_lookup(struct sock *sk, struct bpf_map *map, bool cacheit_lockit) +{ + return NULL; +} #endif #endif /* _BPF_SK_STORAGE_H */ diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index XXXXXXX..XXXXXXX 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -XXX,XX +XXX,XX @@ DEFINE_BPF_STORAGE_CACHE(sk_cache); -static struct bpf_local_storage_data * +struct bpf_local_storage_data * bpf_sk_storage_lookup(struct sock *sk, struct bpf_map *map, bool cacheit_lockit) { struct bpf_local_storage *sk_storage; diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/sched.c +++ b/net/mptcp/sched.c @@ -XXX,XX +XXX,XX @@ #include <linux/list.h> #include <linux/rculist.h> #include <linux/spinlock.h> +#include <net/bpf_sk_storage.h> #include "protocol.h" static DEFINE_SPINLOCK(mptcp_sched_list_lock); @@ -XXX,XX +XXX,XX @@ mptcp_subflow_ctx_by_pos(const struct mptcp_sched_data *data, unsigned int pos) int mptcp_sched_get_params(struct mptcp_sock *msk) { + struct sock *sk = (struct sock *)msk; + if (msk->sched == &mptcp_sched_default) return msk->snd_burst; + if (sk->sk_bpf_storage && sk->sk_bpf_storage->smap && + !strcmp(sk->sk_bpf_storage->smap->map.name, "mptcp_burst_map")) { + struct bpf_local_storage_data *sdata; + + sdata = bpf_sk_storage_lookup(sk, &sk->sk_bpf_storage->smap->map, true); + if (sdata) { + struct mptcp_burst_storage { + int snd_burst; + } *ptr; + + ptr = (struct mptcp_burst_storage *)sdata->data; + if (ptr) + return ptr->snd_burst; + } + } + return 0; } int mptcp_sched_set_params(struct mptcp_sock *msk, int burst) { + struct sock *sk = (struct sock *)msk; + if (msk->sched == &mptcp_sched_default) { msk->snd_burst = burst; return 0; } + if (sk->sk_bpf_storage && sk->sk_bpf_storage->smap && + !strcmp(sk->sk_bpf_storage->smap->map.name, "mptcp_burst_map")) { + struct bpf_local_storage_data *sdata; + + sdata = bpf_sk_storage_lookup(sk, &sk->sk_bpf_storage->smap->map, true); + if (sdata) { + struct mptcp_burst_storage { + int snd_burst; + } *ptr; + + ptr = (struct mptcp_burst_storage *)sdata->data; + if (ptr) { + ptr->snd_burst = burst; + return 0; + } + } + } + return 0; } -- 2.35.3