On Mon, 28 Mar 2022, Geliang Tang wrote:
> This patch adds a new struct member sched in struct mptcp_sock.
> And two helpers mptcp_init_sched() and mptcp_release_sched() to
> init and release it.
>
> Init it with the sysctl scheduler in mptcp_init_sock(), copy the
> scheduler from the parent in mptcp_sk_clone(), and release it in
> __mptcp_destroy_sock().
>
> Signed-off-by: Geliang Tang <geliang.tang@suse.com>
> ---
> net/mptcp/protocol.c | 4 ++++
> net/mptcp/protocol.h | 4 ++++
> net/mptcp/sched.c | 27 +++++++++++++++++++++++++++
> 3 files changed, 35 insertions(+)
>
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 2c684034fe7a..82b3846147a6 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -2662,6 +2662,8 @@ static int mptcp_init_sock(struct sock *sk)
> * propagate the correct value
> */
> mptcp_ca_reset(sk);
> + mptcp_init_sched(mptcp_sk(sk),
> + mptcp_sched_find(net, mptcp_get_scheduler(net)));
>
> sk_sockets_allocated_inc(sk);
> sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
> @@ -2817,6 +2819,7 @@ static void __mptcp_destroy_sock(struct sock *sk)
> sk_stop_timer(sk, &sk->sk_timer);
> mptcp_data_unlock(sk);
> msk->pm.status = 0;
> + mptcp_release_sched(msk);
>
> /* clears msk->subflow, allowing the following loop to close
> * even the initial subflow
> @@ -2994,6 +2997,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
> msk->snd_una = msk->write_seq;
> msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
> msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
> + mptcp_init_sched(msk, mptcp_sk(sk)->sched);
>
> if (mp_opt->suboptions & OPTIONS_MPTCP_MPC) {
> msk->can_ack = true;
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index dd4a2afdb38b..81c35af7ade5 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -288,6 +288,7 @@ struct mptcp_sock {
> struct socket *subflow; /* outgoing connect/listener/!mp_capable */
> struct sock *first;
> struct mptcp_pm_data pm;
> + struct mptcp_sched_ops *sched;
> struct {
> u32 space; /* bytes copied in last measurement window */
> u32 copied; /* bytes copied in this measurement window */
> @@ -618,6 +619,9 @@ void mptcp_unregister_scheduler(const struct net *net,
> void mptcp_sched_init(void);
> struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk);
> void mptcp_sched_data_init(struct sock *sk);
> +int mptcp_init_sched(struct mptcp_sock *msk,
> + struct mptcp_sched_ops *sched);
> +void mptcp_release_sched(struct mptcp_sock *msk);
>
> static inline bool __mptcp_subflow_active(struct mptcp_subflow_context *subflow)
> {
> diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
> index 1fb3dd24d6ff..9b3a3157111d 100644
> --- a/net/mptcp/sched.c
> +++ b/net/mptcp/sched.c
> @@ -125,3 +125,30 @@ void mptcp_sched_data_init(struct sock *sk)
> {
> mptcp_register_scheduler(sock_net(sk), &mptcp_sched_default);
> }
> +
> +int mptcp_init_sched(struct mptcp_sock *msk,
> + struct mptcp_sched_ops *sched)
> +{
> + if (!sched)
> + msk->sched = &mptcp_sched_default;
> + else
> + msk->sched = sched;
> +
> + if (!bpf_try_module_get(msk->sched, msk->sched->owner))
> + return -EBUSY;
The bpf_try_module_get() should be before msk->sched is assigned.
> +
> + if (msk->sched->init)
> + msk->sched->init(msk);
> +
> + pr_debug("sched=%s", msk->sched->name);
> +
> + return 0;
> +}
> +
> +void mptcp_release_sched(struct mptcp_sock *msk)
> +{
> + if (msk->sched && msk->sched->release)
> + msk->sched->release(msk);
Looks like the null check here is already addressed in the squash-to
patch.
> + bpf_module_put(msk->sched, msk->sched->owner);
Would be better to store a local pointer to the sched struct and clear
msk->sched before doing the bpf_module_put() call. With the RCU grace
period it's not likely to be a problem but might as well do the put last.
> + msk->sched = NULL;
> +}
> --
> 2.34.1
>
>
>
--
Mat Martineau
Intel