[PATCH mptcp-next v7 4/8] mptcp: add sched in mptcp_sock

Geliang Tang posted 8 patches 3 years, 10 months ago
Maintainers: Mat Martineau <mathew.j.martineau@linux.intel.com>, Yonghong Song <yhs@fb.com>, Paolo Abeni <pabeni@redhat.com>, John Fastabend <john.fastabend@gmail.com>, Andrii Nakryiko <andrii@kernel.org>, Jakub Kicinski <kuba@kernel.org>, Alexei Starovoitov <ast@kernel.org>, "David S. Miller" <davem@davemloft.net>, KP Singh <kpsingh@kernel.org>, Matthieu Baerts <matthieu.baerts@tessares.net>, Jonathan Corbet <corbet@lwn.net>, Shuah Khan <shuah@kernel.org>, Martin KaFai Lau <kafai@fb.com>, Song Liu <songliubraving@fb.com>, Daniel Borkmann <daniel@iogearbox.net>
There is a newer version of this series
[PATCH mptcp-next v7 4/8] mptcp: add sched in mptcp_sock
Posted by Geliang Tang 3 years, 10 months ago
This patch adds a new struct member sched in struct mptcp_sock.
And two helpers mptcp_init_sched() and mptcp_release_sched() to
init and release it.

Init it with the sysctl scheduler in mptcp_init_sock(), copy the
scheduler from the parent in mptcp_sk_clone(), and release it in
__mptcp_destroy_sock().

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
 net/mptcp/protocol.c |  4 ++++
 net/mptcp/protocol.h |  4 ++++
 net/mptcp/sched.c    | 27 +++++++++++++++++++++++++++
 3 files changed, 35 insertions(+)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 2c684034fe7a..82b3846147a6 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2662,6 +2662,8 @@ static int mptcp_init_sock(struct sock *sk)
 	 * propagate the correct value
 	 */
 	mptcp_ca_reset(sk);
+	mptcp_init_sched(mptcp_sk(sk),
+			 mptcp_sched_find(net, mptcp_get_scheduler(net)));
 
 	sk_sockets_allocated_inc(sk);
 	sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
@@ -2817,6 +2819,7 @@ static void __mptcp_destroy_sock(struct sock *sk)
 	sk_stop_timer(sk, &sk->sk_timer);
 	mptcp_data_unlock(sk);
 	msk->pm.status = 0;
+	mptcp_release_sched(msk);
 
 	/* clears msk->subflow, allowing the following loop to close
 	 * even the initial subflow
@@ -2994,6 +2997,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
 	msk->snd_una = msk->write_seq;
 	msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
 	msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
+	mptcp_init_sched(msk, mptcp_sk(sk)->sched);
 
 	if (mp_opt->suboptions & OPTIONS_MPTCP_MPC) {
 		msk->can_ack = true;
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index dd4a2afdb38b..81c35af7ade5 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -288,6 +288,7 @@ struct mptcp_sock {
 	struct socket	*subflow; /* outgoing connect/listener/!mp_capable */
 	struct sock	*first;
 	struct mptcp_pm_data	pm;
+	struct mptcp_sched_ops	*sched;
 	struct {
 		u32	space;	/* bytes copied in last measurement window */
 		u32	copied; /* bytes copied in this measurement window */
@@ -618,6 +619,9 @@ void mptcp_unregister_scheduler(const struct net *net,
 void mptcp_sched_init(void);
 struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk);
 void mptcp_sched_data_init(struct sock *sk);
+int mptcp_init_sched(struct mptcp_sock *msk,
+		     struct mptcp_sched_ops *sched);
+void mptcp_release_sched(struct mptcp_sock *msk);
 
 static inline bool __mptcp_subflow_active(struct mptcp_subflow_context *subflow)
 {
diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
index 1fb3dd24d6ff..9b3a3157111d 100644
--- a/net/mptcp/sched.c
+++ b/net/mptcp/sched.c
@@ -125,3 +125,30 @@ void mptcp_sched_data_init(struct sock *sk)
 {
 	mptcp_register_scheduler(sock_net(sk), &mptcp_sched_default);
 }
+
+int mptcp_init_sched(struct mptcp_sock *msk,
+		     struct mptcp_sched_ops *sched)
+{
+	if (!sched)
+		msk->sched = &mptcp_sched_default;
+	else
+		msk->sched = sched;
+
+	if (!bpf_try_module_get(msk->sched, msk->sched->owner))
+		return -EBUSY;
+
+	if (msk->sched->init)
+		msk->sched->init(msk);
+
+	pr_debug("sched=%s", msk->sched->name);
+
+	return 0;
+}
+
+void mptcp_release_sched(struct mptcp_sock *msk)
+{
+	if (msk->sched && msk->sched->release)
+		msk->sched->release(msk);
+	bpf_module_put(msk->sched, msk->sched->owner);
+	msk->sched = NULL;
+}
-- 
2.34.1


Re: [PATCH mptcp-next v7 4/8] mptcp: add sched in mptcp_sock
Posted by Mat Martineau 3 years, 10 months ago
On Mon, 28 Mar 2022, Geliang Tang wrote:

> This patch adds a new struct member sched in struct mptcp_sock.
> And two helpers mptcp_init_sched() and mptcp_release_sched() to
> init and release it.
>
> Init it with the sysctl scheduler in mptcp_init_sock(), copy the
> scheduler from the parent in mptcp_sk_clone(), and release it in
> __mptcp_destroy_sock().
>
> Signed-off-by: Geliang Tang <geliang.tang@suse.com>
> ---
> net/mptcp/protocol.c |  4 ++++
> net/mptcp/protocol.h |  4 ++++
> net/mptcp/sched.c    | 27 +++++++++++++++++++++++++++
> 3 files changed, 35 insertions(+)
>
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 2c684034fe7a..82b3846147a6 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -2662,6 +2662,8 @@ static int mptcp_init_sock(struct sock *sk)
> 	 * propagate the correct value
> 	 */
> 	mptcp_ca_reset(sk);
> +	mptcp_init_sched(mptcp_sk(sk),
> +			 mptcp_sched_find(net, mptcp_get_scheduler(net)));
>
> 	sk_sockets_allocated_inc(sk);
> 	sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
> @@ -2817,6 +2819,7 @@ static void __mptcp_destroy_sock(struct sock *sk)
> 	sk_stop_timer(sk, &sk->sk_timer);
> 	mptcp_data_unlock(sk);
> 	msk->pm.status = 0;
> +	mptcp_release_sched(msk);
>
> 	/* clears msk->subflow, allowing the following loop to close
> 	 * even the initial subflow
> @@ -2994,6 +2997,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
> 	msk->snd_una = msk->write_seq;
> 	msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
> 	msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
> +	mptcp_init_sched(msk, mptcp_sk(sk)->sched);
>
> 	if (mp_opt->suboptions & OPTIONS_MPTCP_MPC) {
> 		msk->can_ack = true;
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index dd4a2afdb38b..81c35af7ade5 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -288,6 +288,7 @@ struct mptcp_sock {
> 	struct socket	*subflow; /* outgoing connect/listener/!mp_capable */
> 	struct sock	*first;
> 	struct mptcp_pm_data	pm;
> +	struct mptcp_sched_ops	*sched;
> 	struct {
> 		u32	space;	/* bytes copied in last measurement window */
> 		u32	copied; /* bytes copied in this measurement window */
> @@ -618,6 +619,9 @@ void mptcp_unregister_scheduler(const struct net *net,
> void mptcp_sched_init(void);
> struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk);
> void mptcp_sched_data_init(struct sock *sk);
> +int mptcp_init_sched(struct mptcp_sock *msk,
> +		     struct mptcp_sched_ops *sched);
> +void mptcp_release_sched(struct mptcp_sock *msk);
>
> static inline bool __mptcp_subflow_active(struct mptcp_subflow_context *subflow)
> {
> diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
> index 1fb3dd24d6ff..9b3a3157111d 100644
> --- a/net/mptcp/sched.c
> +++ b/net/mptcp/sched.c
> @@ -125,3 +125,30 @@ void mptcp_sched_data_init(struct sock *sk)
> {
> 	mptcp_register_scheduler(sock_net(sk), &mptcp_sched_default);
> }
> +
> +int mptcp_init_sched(struct mptcp_sock *msk,
> +		     struct mptcp_sched_ops *sched)
> +{
> +	if (!sched)
> +		msk->sched = &mptcp_sched_default;
> +	else
> +		msk->sched = sched;
> +
> +	if (!bpf_try_module_get(msk->sched, msk->sched->owner))
> +		return -EBUSY;

The bpf_try_module_get() should be before msk->sched is assigned.

> +
> +	if (msk->sched->init)
> +		msk->sched->init(msk);
> +
> +	pr_debug("sched=%s", msk->sched->name);
> +
> +	return 0;
> +}
> +
> +void mptcp_release_sched(struct mptcp_sock *msk)
> +{
> +	if (msk->sched && msk->sched->release)
> +		msk->sched->release(msk);

Looks like the null check here is already addressed in the squash-to 
patch.

> +	bpf_module_put(msk->sched, msk->sched->owner);

Would be better to store a local pointer to the sched struct and clear 
msk->sched before doing the bpf_module_put() call. With the RCU grace 
period it's not likely to be a problem but might as well do the put last.

> +	msk->sched = NULL;
> +}
> -- 
> 2.34.1
>
>
>

--
Mat Martineau
Intel

Re: [PATCH mptcp-next v7 4/8] mptcp: add sched in mptcp_sock
Posted by Florian Westphal 3 years, 10 months ago
Geliang Tang <geliang.tang@suse.com> wrote:
> +void mptcp_release_sched(struct mptcp_sock *msk)
> +{
> +	if (msk->sched && msk->sched->release)
> +		msk->sched->release(msk);
> +	bpf_module_put(msk->sched, msk->sched->owner);
> +	msk->sched = NULL;

Strange.  First line makes sure we don't null deref on msk->sched ==
NULL, but bpf_module_put() passes msk->sched->owner?

Does bpf_module_put need to live in a 
if (msk->sched) { branch or is the null test redundant?