The mptcp protocol maintains an additional socket just to easily
invoke a few stream operations on the first subflow. One of them is
__inet_stream_connect().
We are going to remove the first subflow socket soon, so avoid
the addictional indirection via at connect time, calling directly
into the sock-level connect() ops.
No functional change intended.
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
net/mptcp/protocol.c | 40 +++++++++++++++++++++++++++++-----------
1 file changed, 29 insertions(+), 11 deletions(-)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 17174bdae1ca..7445a3cf8812 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -3629,22 +3629,24 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
struct mptcp_sock *msk = mptcp_sk(sk);
struct socket *ssock;
int err = -EINVAL;
+ struct sock *ssk;
ssock = __mptcp_nmpc_socket(msk);
if (IS_ERR(ssock))
return PTR_ERR(ssock);
inet_sk_state_store(sk, TCP_SYN_SENT);
- subflow = mptcp_subflow_ctx(ssock->sk);
+ ssk = msk->first;
+ subflow = mptcp_subflow_ctx(ssk);
#ifdef CONFIG_TCP_MD5SIG
/* no MPTCP if MD5SIG is enabled on this socket or we may run out of
* TCP option space.
*/
- if (rcu_access_pointer(tcp_sk(ssock->sk)->md5sig_info))
+ if (rcu_access_pointer(tcp_sk(ssk)->md5sig_info))
mptcp_subflow_early_fallback(msk, subflow);
#endif
- if (subflow->request_mptcp && mptcp_token_new_connect(ssock->sk)) {
- MPTCP_INC_STATS(sock_net(ssock->sk), MPTCP_MIB_TOKENFALLBACKINIT);
+ if (subflow->request_mptcp && mptcp_token_new_connect(ssk)) {
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_TOKENFALLBACKINIT);
mptcp_subflow_early_fallback(msk, subflow);
}
if (likely(!__mptcp_check_fallback(msk)))
@@ -3653,21 +3655,37 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
/* if reaching here via the fastopen/sendmsg path, the caller already
* acquired the subflow socket lock, too.
*/
- if (msk->fastopening)
- err = __inet_stream_connect(ssock, uaddr, addr_len, O_NONBLOCK, 1);
- else
- err = inet_stream_connect(ssock, uaddr, addr_len, O_NONBLOCK);
- inet_sk(sk)->defer_connect = inet_sk(ssock->sk)->defer_connect;
+ if (!msk->fastopening)
+ lock_sock(ssk);
+
+ if (ssk->sk_state != TCP_CLOSE)
+ goto out;
+
+ if (BPF_CGROUP_PRE_CONNECT_ENABLED(ssk)) {
+ err = ssk->sk_prot->pre_connect(ssk, uaddr, addr_len);
+ if (err)
+ goto out;
+ }
+
+ err = ssk->sk_prot->connect(ssk, uaddr, addr_len);
+ if (err < 0)
+ goto out;
+
+ inet_sk(sk)->defer_connect = inet_sk(ssk)->defer_connect;
+
+out:
+ if (!msk->fastopening)
+ release_sock(ssk);
/* on successful connect, the msk state will be moved to established by
* subflow_finish_connect()
*/
if (unlikely(err && err != -EINPROGRESS)) {
- inet_sk_state_store(sk, inet_sk_state_load(ssock->sk));
+ inet_sk_state_store(sk, inet_sk_state_load(ssk));
return err;
}
- mptcp_copy_inaddrs(sk, ssock->sk);
+ mptcp_copy_inaddrs(sk, ssk);
/* silence EINPROGRESS and let the caller inet_stream_connect
* handle the connection in progress
--
2.41.0
On Mon, 10 Jul 2023, Paolo Abeni wrote:
> The mptcp protocol maintains an additional socket just to easily
> invoke a few stream operations on the first subflow. One of them is
> __inet_stream_connect().
>
Hi Paolo -
Thanks for the series! It will be good to get rid of the confusing subflow
pointer.
> We are going to remove the first subflow socket soon, so avoid
> the addictional indirection via at connect time, calling directly
^^^^^^^^^^^
"additional" is misspelled this way in many of the series commit messages.
> into the sock-level connect() ops.
>
> No functional change intended.
>
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> ---
> net/mptcp/protocol.c | 40 +++++++++++++++++++++++++++++-----------
> 1 file changed, 29 insertions(+), 11 deletions(-)
>
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 17174bdae1ca..7445a3cf8812 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -3629,22 +3629,24 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
> struct mptcp_sock *msk = mptcp_sk(sk);
> struct socket *ssock;
> int err = -EINVAL;
> + struct sock *ssk;
>
> ssock = __mptcp_nmpc_socket(msk);
> if (IS_ERR(ssock))
> return PTR_ERR(ssock);
>
> inet_sk_state_store(sk, TCP_SYN_SENT);
> - subflow = mptcp_subflow_ctx(ssock->sk);
> + ssk = msk->first;
> + subflow = mptcp_subflow_ctx(ssk);
> #ifdef CONFIG_TCP_MD5SIG
> /* no MPTCP if MD5SIG is enabled on this socket or we may run out of
> * TCP option space.
> */
> - if (rcu_access_pointer(tcp_sk(ssock->sk)->md5sig_info))
> + if (rcu_access_pointer(tcp_sk(ssk)->md5sig_info))
> mptcp_subflow_early_fallback(msk, subflow);
> #endif
> - if (subflow->request_mptcp && mptcp_token_new_connect(ssock->sk)) {
> - MPTCP_INC_STATS(sock_net(ssock->sk), MPTCP_MIB_TOKENFALLBACKINIT);
> + if (subflow->request_mptcp && mptcp_token_new_connect(ssk)) {
> + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_TOKENFALLBACKINIT);
> mptcp_subflow_early_fallback(msk, subflow);
> }
> if (likely(!__mptcp_check_fallback(msk)))
> @@ -3653,21 +3655,37 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
> /* if reaching here via the fastopen/sendmsg path, the caller already
> * acquired the subflow socket lock, too.
> */
> - if (msk->fastopening)
> - err = __inet_stream_connect(ssock, uaddr, addr_len, O_NONBLOCK, 1);
> - else
> - err = inet_stream_connect(ssock, uaddr, addr_len, O_NONBLOCK);
> - inet_sk(sk)->defer_connect = inet_sk(ssock->sk)->defer_connect;
> + if (!msk->fastopening)
> + lock_sock(ssk);
> +
> + if (ssk->sk_state != TCP_CLOSE)
> + goto out;
> +
> + if (BPF_CGROUP_PRE_CONNECT_ENABLED(ssk)) {
> + err = ssk->sk_prot->pre_connect(ssk, uaddr, addr_len);
> + if (err)
> + goto out;
> + }
> +
> + err = ssk->sk_prot->connect(ssk, uaddr, addr_len);
> + if (err < 0)
> + goto out;
> +
> + inet_sk(sk)->defer_connect = inet_sk(ssk)->defer_connect;
The above code doesn't do everything __inet_stream_connect() does. Is that
code omitted here because the caller of this function handles the timeouts
and msk-level socket states already?
- Mat
> +
> +out:
> + if (!msk->fastopening)
> + release_sock(ssk);
>
> /* on successful connect, the msk state will be moved to established by
> * subflow_finish_connect()
> */
> if (unlikely(err && err != -EINPROGRESS)) {
> - inet_sk_state_store(sk, inet_sk_state_load(ssock->sk));
> + inet_sk_state_store(sk, inet_sk_state_load(ssk));
> return err;
> }
>
> - mptcp_copy_inaddrs(sk, ssock->sk);
> + mptcp_copy_inaddrs(sk, ssk);
>
> /* silence EINPROGRESS and let the caller inet_stream_connect
> * handle the connection in progress
> --
> 2.41.0
>
>
>
On Wed, 2023-07-12 at 14:49 -0700, Mat Martineau wrote:
> On Mon, 10 Jul 2023, Paolo Abeni wrote:
>
> > The mptcp protocol maintains an additional socket just to easily
> > invoke a few stream operations on the first subflow. One of them is
> > __inet_stream_connect().
> >
>
> Hi Paolo -
>
> Thanks for the series! It will be good to get rid of the confusing subflow
> pointer.
>
> > We are going to remove the first subflow socket soon, so avoid
> > the addictional indirection via at connect time, calling directly
> ^^^^^^^^^^^
>
> "additional" is misspelled this way in many of the series commit messages.
Oops, I guess that the "many" above burns my usual "is a typo" excuse;)
> > @@ -3629,22 +3629,24 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
> > struct mptcp_sock *msk = mptcp_sk(sk);
> > struct socket *ssock;
> > int err = -EINVAL;
> > + struct sock *ssk;
> >
> > ssock = __mptcp_nmpc_socket(msk);
> > if (IS_ERR(ssock))
> > return PTR_ERR(ssock);
> >
> > inet_sk_state_store(sk, TCP_SYN_SENT);
> > - subflow = mptcp_subflow_ctx(ssock->sk);
> > + ssk = msk->first;
> > + subflow = mptcp_subflow_ctx(ssk);
> > #ifdef CONFIG_TCP_MD5SIG
> > /* no MPTCP if MD5SIG is enabled on this socket or we may run out of
> > * TCP option space.
> > */
> > - if (rcu_access_pointer(tcp_sk(ssock->sk)->md5sig_info))
> > + if (rcu_access_pointer(tcp_sk(ssk)->md5sig_info))
> > mptcp_subflow_early_fallback(msk, subflow);
> > #endif
> > - if (subflow->request_mptcp && mptcp_token_new_connect(ssock->sk)) {
> > - MPTCP_INC_STATS(sock_net(ssock->sk), MPTCP_MIB_TOKENFALLBACKINIT);
> > + if (subflow->request_mptcp && mptcp_token_new_connect(ssk)) {
> > + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_TOKENFALLBACKINIT);
> > mptcp_subflow_early_fallback(msk, subflow);
> > }
> > if (likely(!__mptcp_check_fallback(msk)))
> > @@ -3653,21 +3655,37 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
> > /* if reaching here via the fastopen/sendmsg path, the caller already
> > * acquired the subflow socket lock, too.
> > */
> > - if (msk->fastopening)
> > - err = __inet_stream_connect(ssock, uaddr, addr_len, O_NONBLOCK, 1);
> > - else
> > - err = inet_stream_connect(ssock, uaddr, addr_len, O_NONBLOCK);
> > - inet_sk(sk)->defer_connect = inet_sk(ssock->sk)->defer_connect;
> > + if (!msk->fastopening)
> > + lock_sock(ssk);
> > +
> > + if (ssk->sk_state != TCP_CLOSE)
> > + goto out;
> > +
> > + if (BPF_CGROUP_PRE_CONNECT_ENABLED(ssk)) {
> > + err = ssk->sk_prot->pre_connect(ssk, uaddr, addr_len);
> > + if (err)
> > + goto out;
> > + }
> > +
> > + err = ssk->sk_prot->connect(ssk, uaddr, addr_len);
> > + if (err < 0)
> > + goto out;
> > +
> > + inet_sk(sk)->defer_connect = inet_sk(ssk)->defer_connect;
>
> The above code doesn't do everything __inet_stream_connect() does. Is that
> code omitted here because the caller of this function handles the timeouts
> and msk-level socket states already?
Yes. The main point is that we had an inet_stream_connect() call in the
existing/prior code because we did not have access to a more
specific/constrained helper. Maintaining the struct socket state for
the ssock will be useless soon - we are going to drop such struct. So
the above should be all we really need.
Cheers,
Paolo
Hi Paolo, Mat,
Thank you for the series and the reviews!
On 12/07/2023 23:49, Mat Martineau wrote:
> On Mon, 10 Jul 2023, Paolo Abeni wrote:
>
>> The mptcp protocol maintains an additional socket just to easily
>> invoke a few stream operations on the first subflow. One of them is
>> __inet_stream_connect().
(...)
>> @@ -3653,21 +3655,37 @@ static int mptcp_connect(struct sock *sk,
>> struct sockaddr *uaddr, int addr_len)
>> /* if reaching here via the fastopen/sendmsg path, the caller already
>> * acquired the subflow socket lock, too.
>> */
>> - if (msk->fastopening)
>> - err = __inet_stream_connect(ssock, uaddr, addr_len,
>> O_NONBLOCK, 1);
>> - else
>> - err = inet_stream_connect(ssock, uaddr, addr_len, O_NONBLOCK);
>> - inet_sk(sk)->defer_connect = inet_sk(ssock->sk)->defer_connect;
>> + if (!msk->fastopening)
>> + lock_sock(ssk);
>> +
>> + if (ssk->sk_state != TCP_CLOSE)
>> + goto out;
>> +
>> + if (BPF_CGROUP_PRE_CONNECT_ENABLED(ssk)) {
>> + err = ssk->sk_prot->pre_connect(ssk, uaddr, addr_len);
>> + if (err)
>> + goto out;
>> + }
>> +
>> + err = ssk->sk_prot->connect(ssk, uaddr, addr_len);
>> + if (err < 0)
>> + goto out;
>> +
>> + inet_sk(sk)->defer_connect = inet_sk(ssk)->defer_connect;
>
> The above code doesn't do everything __inet_stream_connect() does. Is
> that code omitted here because the caller of this function handles the
> timeouts and msk-level socket states already?
Is it not possible to extract the code we need from
__inet_stream_connect() to a new function in af_inet.c? Then we can call
this new function from __inet_stream_connect() and here.
I'm always a bit worry when we duplicate code from upper layers because
we could miss future modifications done in the original code (we had
that a few times with the fork and it is hard to track and maintain). I
would say that the minimum is at least to add a comment stating this
code is a duplication from somewhere else, just in case to make it clear
where we need to look at in case of issues there.
Cheers,
Matt
--
Tessares | Belgium | Hybrid Access Solutions
www.tessares.net
On Thu, 2023-07-13 at 11:00 +0200, Matthieu Baerts wrote:
> Hi Paolo, Mat,
>
> Thank you for the series and the reviews!
>
> On 12/07/2023 23:49, Mat Martineau wrote:
> > On Mon, 10 Jul 2023, Paolo Abeni wrote:
> >
> > > The mptcp protocol maintains an additional socket just to easily
> > > invoke a few stream operations on the first subflow. One of them is
> > > __inet_stream_connect().
>
> (...)
>
> > > @@ -3653,21 +3655,37 @@ static int mptcp_connect(struct sock *sk,
> > > struct sockaddr *uaddr, int addr_len)
> > > /* if reaching here via the fastopen/sendmsg path, the caller already
> > > * acquired the subflow socket lock, too.
> > > */
> > > - if (msk->fastopening)
> > > - err = __inet_stream_connect(ssock, uaddr, addr_len,
> > > O_NONBLOCK, 1);
> > > - else
> > > - err = inet_stream_connect(ssock, uaddr, addr_len, O_NONBLOCK);
> > > - inet_sk(sk)->defer_connect = inet_sk(ssock->sk)->defer_connect;
> > > + if (!msk->fastopening)
> > > + lock_sock(ssk);
> > > +
> > > + if (ssk->sk_state != TCP_CLOSE)
> > > + goto out;
> > > +
> > > + if (BPF_CGROUP_PRE_CONNECT_ENABLED(ssk)) {
> > > + err = ssk->sk_prot->pre_connect(ssk, uaddr, addr_len);
> > > + if (err)
> > > + goto out;
> > > + }
> > > +
> > > + err = ssk->sk_prot->connect(ssk, uaddr, addr_len);
> > > + if (err < 0)
> > > + goto out;
> > > +
> > > + inet_sk(sk)->defer_connect = inet_sk(ssk)->defer_connect;
> >
> > The above code doesn't do everything __inet_stream_connect() does. Is
> > that code omitted here because the caller of this function handles the
> > timeouts and msk-level socket states already?
>
> Is it not possible to extract the code we need from
> __inet_stream_connect() to a new function in af_inet.c? Then we can call
> this new function from __inet_stream_connect() and here.
>
> I'm always a bit worry when we duplicate code from upper layers because
> we could miss future modifications done in the original code (we had
> that a few times with the fork and it is hard to track and maintain).
Well, that kind of problems becomes order of magnitude worse with out-
of-tree code, because nobody is aware nor interested of such code
needs.
Chances of issues here are really low:
* mptcp code is in tree
* maintainers are aware ;)
* bpf hooks should not change
* [most important topic] the dup code is extremely simple and small
All in all, it looks reasonably safe to me.
> I
> would say that the minimum is at least to add a comment stating this
> code is a duplication from somewhere else, just in case to make it clear
> where we need to look at in case of issues there.
I'll add the comment in v2.
Thanks,
Paolo
Hi Paolo,
On 14/07/2023 10:38, Paolo Abeni wrote:
> On Thu, 2023-07-13 at 11:00 +0200, Matthieu Baerts wrote:
>> Hi Paolo, Mat,
>>
>> Thank you for the series and the reviews!
>>
>> On 12/07/2023 23:49, Mat Martineau wrote:
>>> On Mon, 10 Jul 2023, Paolo Abeni wrote:
>>>
>>>> The mptcp protocol maintains an additional socket just to easily
>>>> invoke a few stream operations on the first subflow. One of them is
>>>> __inet_stream_connect().
>>
>> (...)
>>
>>>> @@ -3653,21 +3655,37 @@ static int mptcp_connect(struct sock *sk,
>>>> struct sockaddr *uaddr, int addr_len)
>>>> /* if reaching here via the fastopen/sendmsg path, the caller already
>>>> * acquired the subflow socket lock, too.
>>>> */
>>>> - if (msk->fastopening)
>>>> - err = __inet_stream_connect(ssock, uaddr, addr_len,
>>>> O_NONBLOCK, 1);
>>>> - else
>>>> - err = inet_stream_connect(ssock, uaddr, addr_len, O_NONBLOCK);
>>>> - inet_sk(sk)->defer_connect = inet_sk(ssock->sk)->defer_connect;
>>>> + if (!msk->fastopening)
>>>> + lock_sock(ssk);
>>>> +
>>>> + if (ssk->sk_state != TCP_CLOSE)
>>>> + goto out;
>>>> +
>>>> + if (BPF_CGROUP_PRE_CONNECT_ENABLED(ssk)) {
>>>> + err = ssk->sk_prot->pre_connect(ssk, uaddr, addr_len);
>>>> + if (err)
>>>> + goto out;
>>>> + }
>>>> +
>>>> + err = ssk->sk_prot->connect(ssk, uaddr, addr_len);
>>>> + if (err < 0)
>>>> + goto out;
>>>> +
>>>> + inet_sk(sk)->defer_connect = inet_sk(ssk)->defer_connect;
>>>
>>> The above code doesn't do everything __inet_stream_connect() does. Is
>>> that code omitted here because the caller of this function handles the
>>> timeouts and msk-level socket states already?
>>
>> Is it not possible to extract the code we need from
>> __inet_stream_connect() to a new function in af_inet.c? Then we can call
>> this new function from __inet_stream_connect() and here.
>>
>> I'm always a bit worry when we duplicate code from upper layers because
>> we could miss future modifications done in the original code (we had
>> that a few times with the fork and it is hard to track and maintain).
>
> Well, that kind of problems becomes order of magnitude worse with out-
> of-tree code, because nobody is aware nor interested of such code
> needs.
>
> Chances of issues here are really low:
> * mptcp code is in tree
> * maintainers are aware ;)
> * bpf hooks should not change
> * [most important topic] the dup code is extremely simple and small
>
> All in all, it looks reasonably safe to me.
Thank you for the explanations!
If you think this is safe for you, that's fine for me :)
Cheers,
Matt
--
Tessares | Belgium | Hybrid Access Solutions
www.tessares.net
© 2016 - 2026 Red Hat, Inc.