[PATCH mptcp-next 03/14] mptcp: avoid additional __inet_stream_connect() call

Paolo Abeni posted 14 patches 2 years, 7 months ago
Maintainers: "David S. Miller" <davem@davemloft.net>, Eric Dumazet <edumazet@google.com>, Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>, David Ahern <dsahern@kernel.org>, Matthieu Baerts <matthieu.baerts@tessares.net>, Mat Martineau <martineau@kernel.org>, Geliang Tang <geliang.tang@suse.com>
There is a newer version of this series
[PATCH mptcp-next 03/14] mptcp: avoid additional __inet_stream_connect() call
Posted by Paolo Abeni 2 years, 7 months ago
The mptcp protocol maintains an additional socket just to easily
invoke a few stream operations on the first subflow. One of them is
__inet_stream_connect().

We are going to remove the first subflow socket soon, so avoid
the addictional indirection via at connect time, calling directly
into the sock-level connect() ops.

No functional change intended.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/mptcp/protocol.c | 40 +++++++++++++++++++++++++++++-----------
 1 file changed, 29 insertions(+), 11 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 17174bdae1ca..7445a3cf8812 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -3629,22 +3629,24 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	struct mptcp_sock *msk = mptcp_sk(sk);
 	struct socket *ssock;
 	int err = -EINVAL;
+	struct sock *ssk;
 
 	ssock = __mptcp_nmpc_socket(msk);
 	if (IS_ERR(ssock))
 		return PTR_ERR(ssock);
 
 	inet_sk_state_store(sk, TCP_SYN_SENT);
-	subflow = mptcp_subflow_ctx(ssock->sk);
+	ssk = msk->first;
+	subflow = mptcp_subflow_ctx(ssk);
 #ifdef CONFIG_TCP_MD5SIG
 	/* no MPTCP if MD5SIG is enabled on this socket or we may run out of
 	 * TCP option space.
 	 */
-	if (rcu_access_pointer(tcp_sk(ssock->sk)->md5sig_info))
+	if (rcu_access_pointer(tcp_sk(ssk)->md5sig_info))
 		mptcp_subflow_early_fallback(msk, subflow);
 #endif
-	if (subflow->request_mptcp && mptcp_token_new_connect(ssock->sk)) {
-		MPTCP_INC_STATS(sock_net(ssock->sk), MPTCP_MIB_TOKENFALLBACKINIT);
+	if (subflow->request_mptcp && mptcp_token_new_connect(ssk)) {
+		MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_TOKENFALLBACKINIT);
 		mptcp_subflow_early_fallback(msk, subflow);
 	}
 	if (likely(!__mptcp_check_fallback(msk)))
@@ -3653,21 +3655,37 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	/* if reaching here via the fastopen/sendmsg path, the caller already
 	 * acquired the subflow socket lock, too.
 	 */
-	if (msk->fastopening)
-		err = __inet_stream_connect(ssock, uaddr, addr_len, O_NONBLOCK, 1);
-	else
-		err = inet_stream_connect(ssock, uaddr, addr_len, O_NONBLOCK);
-	inet_sk(sk)->defer_connect = inet_sk(ssock->sk)->defer_connect;
+	if (!msk->fastopening)
+		lock_sock(ssk);
+
+	if (ssk->sk_state != TCP_CLOSE)
+		goto out;
+
+	if (BPF_CGROUP_PRE_CONNECT_ENABLED(ssk)) {
+		err = ssk->sk_prot->pre_connect(ssk, uaddr, addr_len);
+		if (err)
+			goto out;
+	}
+
+	err = ssk->sk_prot->connect(ssk, uaddr, addr_len);
+	if (err < 0)
+		goto out;
+
+	inet_sk(sk)->defer_connect = inet_sk(ssk)->defer_connect;
+
+out:
+	if (!msk->fastopening)
+		release_sock(ssk);
 
 	/* on successful connect, the msk state will be moved to established by
 	 * subflow_finish_connect()
 	 */
 	if (unlikely(err && err != -EINPROGRESS)) {
-		inet_sk_state_store(sk, inet_sk_state_load(ssock->sk));
+		inet_sk_state_store(sk, inet_sk_state_load(ssk));
 		return err;
 	}
 
-	mptcp_copy_inaddrs(sk, ssock->sk);
+	mptcp_copy_inaddrs(sk, ssk);
 
 	/* silence EINPROGRESS and let the caller inet_stream_connect
 	 * handle the connection in progress
-- 
2.41.0
Re: [PATCH mptcp-next 03/14] mptcp: avoid additional __inet_stream_connect() call
Posted by Mat Martineau 2 years, 7 months ago
On Mon, 10 Jul 2023, Paolo Abeni wrote:

> The mptcp protocol maintains an additional socket just to easily
> invoke a few stream operations on the first subflow. One of them is
> __inet_stream_connect().
>

Hi Paolo -

Thanks for the series! It will be good to get rid of the confusing subflow 
pointer.

> We are going to remove the first subflow socket soon, so avoid
> the addictional indirection via at connect time, calling directly
       ^^^^^^^^^^^

"additional" is misspelled this way in many of the series commit messages.

> into the sock-level connect() ops.
>
> No functional change intended.
>
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> ---
> net/mptcp/protocol.c | 40 +++++++++++++++++++++++++++++-----------
> 1 file changed, 29 insertions(+), 11 deletions(-)
>
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 17174bdae1ca..7445a3cf8812 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -3629,22 +3629,24 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
> 	struct mptcp_sock *msk = mptcp_sk(sk);
> 	struct socket *ssock;
> 	int err = -EINVAL;
> +	struct sock *ssk;
>
> 	ssock = __mptcp_nmpc_socket(msk);
> 	if (IS_ERR(ssock))
> 		return PTR_ERR(ssock);
>
> 	inet_sk_state_store(sk, TCP_SYN_SENT);
> -	subflow = mptcp_subflow_ctx(ssock->sk);
> +	ssk = msk->first;
> +	subflow = mptcp_subflow_ctx(ssk);
> #ifdef CONFIG_TCP_MD5SIG
> 	/* no MPTCP if MD5SIG is enabled on this socket or we may run out of
> 	 * TCP option space.
> 	 */
> -	if (rcu_access_pointer(tcp_sk(ssock->sk)->md5sig_info))
> +	if (rcu_access_pointer(tcp_sk(ssk)->md5sig_info))
> 		mptcp_subflow_early_fallback(msk, subflow);
> #endif
> -	if (subflow->request_mptcp && mptcp_token_new_connect(ssock->sk)) {
> -		MPTCP_INC_STATS(sock_net(ssock->sk), MPTCP_MIB_TOKENFALLBACKINIT);
> +	if (subflow->request_mptcp && mptcp_token_new_connect(ssk)) {
> +		MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_TOKENFALLBACKINIT);
> 		mptcp_subflow_early_fallback(msk, subflow);
> 	}
> 	if (likely(!__mptcp_check_fallback(msk)))
> @@ -3653,21 +3655,37 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
> 	/* if reaching here via the fastopen/sendmsg path, the caller already
> 	 * acquired the subflow socket lock, too.
> 	 */
> -	if (msk->fastopening)
> -		err = __inet_stream_connect(ssock, uaddr, addr_len, O_NONBLOCK, 1);
> -	else
> -		err = inet_stream_connect(ssock, uaddr, addr_len, O_NONBLOCK);
> -	inet_sk(sk)->defer_connect = inet_sk(ssock->sk)->defer_connect;
> +	if (!msk->fastopening)
> +		lock_sock(ssk);
> +
> +	if (ssk->sk_state != TCP_CLOSE)
> +		goto out;
> +
> +	if (BPF_CGROUP_PRE_CONNECT_ENABLED(ssk)) {
> +		err = ssk->sk_prot->pre_connect(ssk, uaddr, addr_len);
> +		if (err)
> +			goto out;
> +	}
> +
> +	err = ssk->sk_prot->connect(ssk, uaddr, addr_len);
> +	if (err < 0)
> +		goto out;
> +
> +	inet_sk(sk)->defer_connect = inet_sk(ssk)->defer_connect;

The above code doesn't do everything __inet_stream_connect() does. Is that 
code omitted here because the caller of this function handles the timeouts 
and msk-level socket states already?

- Mat

> +
> +out:
> +	if (!msk->fastopening)
> +		release_sock(ssk);
>
> 	/* on successful connect, the msk state will be moved to established by
> 	 * subflow_finish_connect()
> 	 */
> 	if (unlikely(err && err != -EINPROGRESS)) {
> -		inet_sk_state_store(sk, inet_sk_state_load(ssock->sk));
> +		inet_sk_state_store(sk, inet_sk_state_load(ssk));
> 		return err;
> 	}
>
> -	mptcp_copy_inaddrs(sk, ssock->sk);
> +	mptcp_copy_inaddrs(sk, ssk);
>
> 	/* silence EINPROGRESS and let the caller inet_stream_connect
> 	 * handle the connection in progress
> -- 
> 2.41.0
>
>
>
Re: [PATCH mptcp-next 03/14] mptcp: avoid additional __inet_stream_connect() call
Posted by Paolo Abeni 2 years, 6 months ago
On Wed, 2023-07-12 at 14:49 -0700, Mat Martineau wrote:
> On Mon, 10 Jul 2023, Paolo Abeni wrote:
> 
> > The mptcp protocol maintains an additional socket just to easily
> > invoke a few stream operations on the first subflow. One of them is
> > __inet_stream_connect().
> > 
> 
> Hi Paolo -
> 
> Thanks for the series! It will be good to get rid of the confusing subflow 
> pointer.
> 
> > We are going to remove the first subflow socket soon, so avoid
> > the addictional indirection via at connect time, calling directly
>        ^^^^^^^^^^^
> 
> "additional" is misspelled this way in many of the series commit messages.

Oops, I guess that the "many" above burns my usual "is a typo" excuse;)

> > @@ -3629,22 +3629,24 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
> > 	struct mptcp_sock *msk = mptcp_sk(sk);
> > 	struct socket *ssock;
> > 	int err = -EINVAL;
> > +	struct sock *ssk;
> > 
> > 	ssock = __mptcp_nmpc_socket(msk);
> > 	if (IS_ERR(ssock))
> > 		return PTR_ERR(ssock);
> > 
> > 	inet_sk_state_store(sk, TCP_SYN_SENT);
> > -	subflow = mptcp_subflow_ctx(ssock->sk);
> > +	ssk = msk->first;
> > +	subflow = mptcp_subflow_ctx(ssk);
> > #ifdef CONFIG_TCP_MD5SIG
> > 	/* no MPTCP if MD5SIG is enabled on this socket or we may run out of
> > 	 * TCP option space.
> > 	 */
> > -	if (rcu_access_pointer(tcp_sk(ssock->sk)->md5sig_info))
> > +	if (rcu_access_pointer(tcp_sk(ssk)->md5sig_info))
> > 		mptcp_subflow_early_fallback(msk, subflow);
> > #endif
> > -	if (subflow->request_mptcp && mptcp_token_new_connect(ssock->sk)) {
> > -		MPTCP_INC_STATS(sock_net(ssock->sk), MPTCP_MIB_TOKENFALLBACKINIT);
> > +	if (subflow->request_mptcp && mptcp_token_new_connect(ssk)) {
> > +		MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_TOKENFALLBACKINIT);
> > 		mptcp_subflow_early_fallback(msk, subflow);
> > 	}
> > 	if (likely(!__mptcp_check_fallback(msk)))
> > @@ -3653,21 +3655,37 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
> > 	/* if reaching here via the fastopen/sendmsg path, the caller already
> > 	 * acquired the subflow socket lock, too.
> > 	 */
> > -	if (msk->fastopening)
> > -		err = __inet_stream_connect(ssock, uaddr, addr_len, O_NONBLOCK, 1);
> > -	else
> > -		err = inet_stream_connect(ssock, uaddr, addr_len, O_NONBLOCK);
> > -	inet_sk(sk)->defer_connect = inet_sk(ssock->sk)->defer_connect;
> > +	if (!msk->fastopening)
> > +		lock_sock(ssk);
> > +
> > +	if (ssk->sk_state != TCP_CLOSE)
> > +		goto out;
> > +
> > +	if (BPF_CGROUP_PRE_CONNECT_ENABLED(ssk)) {
> > +		err = ssk->sk_prot->pre_connect(ssk, uaddr, addr_len);
> > +		if (err)
> > +			goto out;
> > +	}
> > +
> > +	err = ssk->sk_prot->connect(ssk, uaddr, addr_len);
> > +	if (err < 0)
> > +		goto out;
> > +
> > +	inet_sk(sk)->defer_connect = inet_sk(ssk)->defer_connect;
> 
> The above code doesn't do everything __inet_stream_connect() does. Is that 
> code omitted here because the caller of this function handles the timeouts 
> and msk-level socket states already?

Yes. The main point is that we had an inet_stream_connect() call in the
existing/prior code because we did not have access to a more
specific/constrained helper. Maintaining the struct socket state for
the ssock will be useless soon - we are going to drop such struct. So
the above should be all we really need.


Cheers,

Paolo
Re: [PATCH mptcp-next 03/14] mptcp: avoid additional __inet_stream_connect() call
Posted by Matthieu Baerts 2 years, 7 months ago
Hi Paolo, Mat,

Thank you for the series and the reviews!

On 12/07/2023 23:49, Mat Martineau wrote:
> On Mon, 10 Jul 2023, Paolo Abeni wrote:
> 
>> The mptcp protocol maintains an additional socket just to easily
>> invoke a few stream operations on the first subflow. One of them is
>> __inet_stream_connect().

(...)

>> @@ -3653,21 +3655,37 @@ static int mptcp_connect(struct sock *sk,
>> struct sockaddr *uaddr, int addr_len)
>>     /* if reaching here via the fastopen/sendmsg path, the caller already
>>      * acquired the subflow socket lock, too.
>>      */
>> -    if (msk->fastopening)
>> -        err = __inet_stream_connect(ssock, uaddr, addr_len,
>> O_NONBLOCK, 1);
>> -    else
>> -        err = inet_stream_connect(ssock, uaddr, addr_len, O_NONBLOCK);
>> -    inet_sk(sk)->defer_connect = inet_sk(ssock->sk)->defer_connect;
>> +    if (!msk->fastopening)
>> +        lock_sock(ssk);
>> +
>> +    if (ssk->sk_state != TCP_CLOSE)
>> +        goto out;
>> +
>> +    if (BPF_CGROUP_PRE_CONNECT_ENABLED(ssk)) {
>> +        err = ssk->sk_prot->pre_connect(ssk, uaddr, addr_len);
>> +        if (err)
>> +            goto out;
>> +    }
>> +
>> +    err = ssk->sk_prot->connect(ssk, uaddr, addr_len);
>> +    if (err < 0)
>> +        goto out;
>> +
>> +    inet_sk(sk)->defer_connect = inet_sk(ssk)->defer_connect;
> 
> The above code doesn't do everything __inet_stream_connect() does. Is
> that code omitted here because the caller of this function handles the
> timeouts and msk-level socket states already?

Is it not possible to extract the code we need from
__inet_stream_connect() to a new function in af_inet.c? Then we can call
this new function from __inet_stream_connect() and here.

I'm always a bit worry when we duplicate code from upper layers because
we could miss future modifications done in the original code (we had
that a few times with the fork and it is hard to track and maintain). I
would say that the minimum is at least to add a comment stating this
code is a duplication from somewhere else, just in case to make it clear
where we need to look at in case of issues there.

Cheers,
Matt
-- 
Tessares | Belgium | Hybrid Access Solutions
www.tessares.net
Re: [PATCH mptcp-next 03/14] mptcp: avoid additional __inet_stream_connect() call
Posted by Paolo Abeni 2 years, 6 months ago
On Thu, 2023-07-13 at 11:00 +0200, Matthieu Baerts wrote:
> Hi Paolo, Mat,
> 
> Thank you for the series and the reviews!
> 
> On 12/07/2023 23:49, Mat Martineau wrote:
> > On Mon, 10 Jul 2023, Paolo Abeni wrote:
> > 
> > > The mptcp protocol maintains an additional socket just to easily
> > > invoke a few stream operations on the first subflow. One of them is
> > > __inet_stream_connect().
> 
> (...)
> 
> > > @@ -3653,21 +3655,37 @@ static int mptcp_connect(struct sock *sk,
> > > struct sockaddr *uaddr, int addr_len)
> > >     /* if reaching here via the fastopen/sendmsg path, the caller already
> > >      * acquired the subflow socket lock, too.
> > >      */
> > > -    if (msk->fastopening)
> > > -        err = __inet_stream_connect(ssock, uaddr, addr_len,
> > > O_NONBLOCK, 1);
> > > -    else
> > > -        err = inet_stream_connect(ssock, uaddr, addr_len, O_NONBLOCK);
> > > -    inet_sk(sk)->defer_connect = inet_sk(ssock->sk)->defer_connect;
> > > +    if (!msk->fastopening)
> > > +        lock_sock(ssk);
> > > +
> > > +    if (ssk->sk_state != TCP_CLOSE)
> > > +        goto out;
> > > +
> > > +    if (BPF_CGROUP_PRE_CONNECT_ENABLED(ssk)) {
> > > +        err = ssk->sk_prot->pre_connect(ssk, uaddr, addr_len);
> > > +        if (err)
> > > +            goto out;
> > > +    }
> > > +
> > > +    err = ssk->sk_prot->connect(ssk, uaddr, addr_len);
> > > +    if (err < 0)
> > > +        goto out;
> > > +
> > > +    inet_sk(sk)->defer_connect = inet_sk(ssk)->defer_connect;
> > 
> > The above code doesn't do everything __inet_stream_connect() does. Is
> > that code omitted here because the caller of this function handles the
> > timeouts and msk-level socket states already?
> 
> Is it not possible to extract the code we need from
> __inet_stream_connect() to a new function in af_inet.c? Then we can call
> this new function from __inet_stream_connect() and here.
> 
> I'm always a bit worry when we duplicate code from upper layers because
> we could miss future modifications done in the original code (we had
> that a few times with the fork and it is hard to track and maintain).

Well, that kind of problems becomes order of magnitude worse with out-
of-tree code, because nobody is aware nor interested of such code
needs.

Chances of issues here are really low:
* mptcp code is in tree 
* maintainers are aware ;)
* bpf hooks should not change
* [most important topic] the dup code is extremely simple and small

All in all, it looks reasonably safe to me.

> I
> would say that the minimum is at least to add a comment stating this
> code is a duplication from somewhere else, just in case to make it clear
> where we need to look at in case of issues there.

I'll add the comment in v2.

Thanks,

Paolo
Re: [PATCH mptcp-next 03/14] mptcp: avoid additional __inet_stream_connect() call
Posted by Matthieu Baerts 2 years, 6 months ago
Hi Paolo,

On 14/07/2023 10:38, Paolo Abeni wrote:
> On Thu, 2023-07-13 at 11:00 +0200, Matthieu Baerts wrote:
>> Hi Paolo, Mat,
>>
>> Thank you for the series and the reviews!
>>
>> On 12/07/2023 23:49, Mat Martineau wrote:
>>> On Mon, 10 Jul 2023, Paolo Abeni wrote:
>>>
>>>> The mptcp protocol maintains an additional socket just to easily
>>>> invoke a few stream operations on the first subflow. One of them is
>>>> __inet_stream_connect().
>>
>> (...)
>>
>>>> @@ -3653,21 +3655,37 @@ static int mptcp_connect(struct sock *sk,
>>>> struct sockaddr *uaddr, int addr_len)
>>>>     /* if reaching here via the fastopen/sendmsg path, the caller already
>>>>      * acquired the subflow socket lock, too.
>>>>      */
>>>> -    if (msk->fastopening)
>>>> -        err = __inet_stream_connect(ssock, uaddr, addr_len,
>>>> O_NONBLOCK, 1);
>>>> -    else
>>>> -        err = inet_stream_connect(ssock, uaddr, addr_len, O_NONBLOCK);
>>>> -    inet_sk(sk)->defer_connect = inet_sk(ssock->sk)->defer_connect;
>>>> +    if (!msk->fastopening)
>>>> +        lock_sock(ssk);
>>>> +
>>>> +    if (ssk->sk_state != TCP_CLOSE)
>>>> +        goto out;
>>>> +
>>>> +    if (BPF_CGROUP_PRE_CONNECT_ENABLED(ssk)) {
>>>> +        err = ssk->sk_prot->pre_connect(ssk, uaddr, addr_len);
>>>> +        if (err)
>>>> +            goto out;
>>>> +    }
>>>> +
>>>> +    err = ssk->sk_prot->connect(ssk, uaddr, addr_len);
>>>> +    if (err < 0)
>>>> +        goto out;
>>>> +
>>>> +    inet_sk(sk)->defer_connect = inet_sk(ssk)->defer_connect;
>>>
>>> The above code doesn't do everything __inet_stream_connect() does. Is
>>> that code omitted here because the caller of this function handles the
>>> timeouts and msk-level socket states already?
>>
>> Is it not possible to extract the code we need from
>> __inet_stream_connect() to a new function in af_inet.c? Then we can call
>> this new function from __inet_stream_connect() and here.
>>
>> I'm always a bit worry when we duplicate code from upper layers because
>> we could miss future modifications done in the original code (we had
>> that a few times with the fork and it is hard to track and maintain).
> 
> Well, that kind of problems becomes order of magnitude worse with out-
> of-tree code, because nobody is aware nor interested of such code
> needs.
> 
> Chances of issues here are really low:
> * mptcp code is in tree 
> * maintainers are aware ;)
> * bpf hooks should not change
> * [most important topic] the dup code is extremely simple and small
> 
> All in all, it looks reasonably safe to me.

Thank you for the explanations!

If you think this is safe for you, that's fine for me :)

Cheers,
Matt
-- 
Tessares | Belgium | Hybrid Access Solutions
www.tessares.net