[PATCH mptcp-net v3] mptcp: fix abba deadlock on fastopen

Paolo Abeni posted 1 patch 1 year, 6 months ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/multipath-tcp/mptcp_net-next tags/patchew/7c61250080c052b9dfb42a023c85dc36d4450daf.1665584136.git.pabeni@redhat.com
Maintainers: Mat Martineau <mathew.j.martineau@linux.intel.com>, Matthieu Baerts <matthieu.baerts@tessares.net>, "David S. Miller" <davem@davemloft.net>, Eric Dumazet <edumazet@google.com>, Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>, Dmytro Shytyi <dmytro@shytyi.net>, Benjamin Hesmans <benjamin.hesmans@tessares.net>
net/mptcp/protocol.c | 49 ++++++++++++++++++++++++++++++--------------
1 file changed, 34 insertions(+), 15 deletions(-)
[PATCH mptcp-net v3] mptcp: fix abba deadlock on fastopen
Posted by Paolo Abeni 1 year, 6 months ago
Our CI reported lockdep splat in the fastopen code:
 ======================================================
 WARNING: possible circular locking dependency detected
 6.0.0.mptcp_f5e8bfe9878d+ #1558 Not tainted
 ------------------------------------------------------
 packetdrill/1071 is trying to acquire lock:
 ffff8881bd198140 (sk_lock-AF_INET){+.+.}-{0:0}, at: inet_wait_for_connect+0x19c/0x310

 but task is already holding lock:
 ffff8881b8346540 (k-sk_lock-AF_INET){+.+.}-{0:0}, at: mptcp_sendmsg+0xfdf/0x1740

 which lock already depends on the new lock.

 the existing dependency chain (in reverse order) is:

 -> #1 (k-sk_lock-AF_INET){+.+.}-{0:0}:
        __lock_acquire+0xb6d/0x1860
        lock_acquire+0x1d8/0x620
        lock_sock_nested+0x37/0xd0
        inet_stream_connect+0x3f/0xa0
        mptcp_connect+0x411/0x800
        __inet_stream_connect+0x3ab/0x800
        mptcp_stream_connect+0xac/0x110
        __sys_connect+0x101/0x130
        __x64_sys_connect+0x6e/0xb0
        do_syscall_64+0x59/0x90
        entry_SYSCALL_64_after_hwframe+0x63/0xcd

 -> #0 (sk_lock-AF_INET){+.+.}-{0:0}:
        check_prev_add+0x15e/0x2110
        validate_chain+0xace/0xdf0
        __lock_acquire+0xb6d/0x1860
        lock_acquire+0x1d8/0x620
        lock_sock_nested+0x37/0xd0
        inet_wait_for_connect+0x19c/0x310
        __inet_stream_connect+0x26c/0x800
        tcp_sendmsg_fastopen+0x341/0x650
        mptcp_sendmsg+0x109d/0x1740
        sock_sendmsg+0xe1/0x120
        __sys_sendto+0x1c7/0x2a0
        __x64_sys_sendto+0xdc/0x1b0
        do_syscall_64+0x59/0x90
        entry_SYSCALL_64_after_hwframe+0x63/0xcd

 other info that might help us debug this:

  Possible unsafe locking scenario:

        CPU0                    CPU1
        ----                    ----
   lock(k-sk_lock-AF_INET);
                                lock(sk_lock-AF_INET);
                                lock(k-sk_lock-AF_INET);
   lock(sk_lock-AF_INET);

  *** DEADLOCK ***

 1 lock held by packetdrill/1071:
  #0: ffff8881b8346540 (k-sk_lock-AF_INET){+.+.}-{0:0}, at: mptcp_sendmsg+0xfdf/0x1740
 ======================================================

The problem is caused by the blocking inet_wait_for_connect() releasing
and re-acquiring the msk socket lock while the subflow socket lock is
still held and the MPTCP socket requires that the msk socket lock must
be acquired before the subflow socket lock.

Address the issue always invoking tcp_sendmsg_fastopen() in an
unblocking manner, and later eventually complete the blocking
__inet_stream_connect() as needed.

Fixes: d98a82a6afc7 ("mptcp: handle defer connect in mptcp_sendmsg")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
v2 -> v3:
 - dropped FASTOPEN_CONNECT chunk (Matttbe)
 - comments fixlet (Matttbe)
 - rebased on top of (""mptcp: factor out mptcp_connect()")

v1 -> v2:
 - really invoke fastopen in unblocking manner
 - clear copied_syn on completion errors
---
 net/mptcp/protocol.c | 49 ++++++++++++++++++++++++++++++--------------
 1 file changed, 34 insertions(+), 15 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 1cdf9cd1a3cc..82d899233df4 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1673,6 +1673,37 @@ static void mptcp_set_nospace(struct sock *sk)
 	set_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags);
 }
 
+static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msghdr *msg,
+				  size_t len, int *copied_syn)
+{
+	unsigned int saved_flags = msg->msg_flags;
+	struct mptcp_sock *msk = mptcp_sk(sk);
+	int ret;
+
+	lock_sock(ssk);
+	msg->msg_flags |= MSG_DONTWAIT;
+	msk->connect_flags = O_NONBLOCK;
+	msk->is_sendmsg = 1;
+	ret = tcp_sendmsg_fastopen(ssk, msg, copied_syn, len, NULL);
+	msk->is_sendmsg = 0;
+	msg->msg_flags = saved_flags;
+	release_sock(ssk);
+
+	/* do the blocking bits of inet_stream_connect outside the ssk socket lock */
+	if (ret == -EINPROGRESS && !(msg->msg_flags & MSG_DONTWAIT)) {
+		ret = __inet_stream_connect(sk->sk_socket, msg->msg_name,
+					    msg->msg_namelen, msg->msg_flags, 1);
+
+		/* Keep the same behaviout of plain TCP: zero the copied bytes in
+		 * case of any error, except timeout or signal
+		 */
+		if (ret && ret != -EINPROGRESS && ret != -ERESTARTSYS && ret != -EINTR)
+			*copied_syn = 0;
+	}
+
+	return ret;
+}
+
 static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 {
 	struct mptcp_sock *msk = mptcp_sk(sk);
@@ -1693,26 +1724,14 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 
 	ssock = __mptcp_nmpc_socket(msk);
 	if (unlikely(ssock && inet_sk(ssock->sk)->defer_connect)) {
-		struct sock *ssk = ssock->sk;
 		int copied_syn = 0;
 
-		lock_sock(ssk);
-
-		msk->connect_flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0;
-		msk->is_sendmsg = 1;
-		ret = tcp_sendmsg_fastopen(ssk, msg, &copied_syn, len, NULL);
-		msk->is_sendmsg = 0;
+		ret = mptcp_sendmsg_fastopen(sk, ssock->sk, msg, len, &copied_syn);
 		copied += copied_syn;
-		if (ret == -EINPROGRESS && copied_syn > 0) {
-			/* reflect the new state on the MPTCP socket */
-			inet_sk_state_store(sk, inet_sk_state_load(ssk));
-			release_sock(ssk);
+		if (ret == -EINPROGRESS && copied_syn > 0)
 			goto out;
-		} else if (ret) {
-			release_sock(ssk);
+		else if (ret)
 			goto do_error;
-		}
-		release_sock(ssk);
 	}
 
 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
-- 
2.37.3
Re: [PATCH mptcp-net v3] mptcp: fix abba deadlock on fastopen
Posted by Mat Martineau 1 year, 6 months ago
On Wed, 12 Oct 2022, Paolo Abeni wrote:

> Our CI reported lockdep splat in the fastopen code:
> ======================================================
> WARNING: possible circular locking dependency detected
> 6.0.0.mptcp_f5e8bfe9878d+ #1558 Not tainted
> ------------------------------------------------------
> packetdrill/1071 is trying to acquire lock:
> ffff8881bd198140 (sk_lock-AF_INET){+.+.}-{0:0}, at: inet_wait_for_connect+0x19c/0x310
>
> but task is already holding lock:
> ffff8881b8346540 (k-sk_lock-AF_INET){+.+.}-{0:0}, at: mptcp_sendmsg+0xfdf/0x1740
>
> which lock already depends on the new lock.
>
> the existing dependency chain (in reverse order) is:
>
> -> #1 (k-sk_lock-AF_INET){+.+.}-{0:0}:
>        __lock_acquire+0xb6d/0x1860
>        lock_acquire+0x1d8/0x620
>        lock_sock_nested+0x37/0xd0
>        inet_stream_connect+0x3f/0xa0
>        mptcp_connect+0x411/0x800
>        __inet_stream_connect+0x3ab/0x800
>        mptcp_stream_connect+0xac/0x110
>        __sys_connect+0x101/0x130
>        __x64_sys_connect+0x6e/0xb0
>        do_syscall_64+0x59/0x90
>        entry_SYSCALL_64_after_hwframe+0x63/0xcd
>
> -> #0 (sk_lock-AF_INET){+.+.}-{0:0}:
>        check_prev_add+0x15e/0x2110
>        validate_chain+0xace/0xdf0
>        __lock_acquire+0xb6d/0x1860
>        lock_acquire+0x1d8/0x620
>        lock_sock_nested+0x37/0xd0
>        inet_wait_for_connect+0x19c/0x310
>        __inet_stream_connect+0x26c/0x800
>        tcp_sendmsg_fastopen+0x341/0x650
>        mptcp_sendmsg+0x109d/0x1740
>        sock_sendmsg+0xe1/0x120
>        __sys_sendto+0x1c7/0x2a0
>        __x64_sys_sendto+0xdc/0x1b0
>        do_syscall_64+0x59/0x90
>        entry_SYSCALL_64_after_hwframe+0x63/0xcd
>
> other info that might help us debug this:
>
>  Possible unsafe locking scenario:
>
>        CPU0                    CPU1
>        ----                    ----
>   lock(k-sk_lock-AF_INET);
>                                lock(sk_lock-AF_INET);
>                                lock(k-sk_lock-AF_INET);
>   lock(sk_lock-AF_INET);
>
>  *** DEADLOCK ***
>
> 1 lock held by packetdrill/1071:
>  #0: ffff8881b8346540 (k-sk_lock-AF_INET){+.+.}-{0:0}, at: mptcp_sendmsg+0xfdf/0x1740
> ======================================================
>
> The problem is caused by the blocking inet_wait_for_connect() releasing
> and re-acquiring the msk socket lock while the subflow socket lock is
> still held and the MPTCP socket requires that the msk socket lock must
> be acquired before the subflow socket lock.
>
> Address the issue always invoking tcp_sendmsg_fastopen() in an
> unblocking manner, and later eventually complete the blocking
> __inet_stream_connect() as needed.
>
> Fixes: d98a82a6afc7 ("mptcp: handle defer connect in mptcp_sendmsg")
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> ---
> v2 -> v3:
> - dropped FASTOPEN_CONNECT chunk (Matttbe)
> - comments fixlet (Matttbe)
> - rebased on top of (""mptcp: factor out mptcp_connect()")
>

I don't know if Matthieu would like additional changes, but with the 
suggested change to https://github.com/multipath-tcp/packetdrill/pull/97 
this v3 patch is testing fine for me:

Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>

> v1 -> v2:
> - really invoke fastopen in unblocking manner
> - clear copied_syn on completion errors
> ---
> net/mptcp/protocol.c | 49 ++++++++++++++++++++++++++++++--------------
> 1 file changed, 34 insertions(+), 15 deletions(-)
>
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 1cdf9cd1a3cc..82d899233df4 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -1673,6 +1673,37 @@ static void mptcp_set_nospace(struct sock *sk)
> 	set_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags);
> }
>
> +static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msghdr *msg,
> +				  size_t len, int *copied_syn)
> +{
> +	unsigned int saved_flags = msg->msg_flags;
> +	struct mptcp_sock *msk = mptcp_sk(sk);
> +	int ret;
> +
> +	lock_sock(ssk);
> +	msg->msg_flags |= MSG_DONTWAIT;
> +	msk->connect_flags = O_NONBLOCK;
> +	msk->is_sendmsg = 1;
> +	ret = tcp_sendmsg_fastopen(ssk, msg, copied_syn, len, NULL);
> +	msk->is_sendmsg = 0;
> +	msg->msg_flags = saved_flags;
> +	release_sock(ssk);
> +
> +	/* do the blocking bits of inet_stream_connect outside the ssk socket lock */
> +	if (ret == -EINPROGRESS && !(msg->msg_flags & MSG_DONTWAIT)) {
> +		ret = __inet_stream_connect(sk->sk_socket, msg->msg_name,
> +					    msg->msg_namelen, msg->msg_flags, 1);
> +
> +		/* Keep the same behaviout of plain TCP: zero the copied bytes in
> +		 * case of any error, except timeout or signal
> +		 */
> +		if (ret && ret != -EINPROGRESS && ret != -ERESTARTSYS && ret != -EINTR)
> +			*copied_syn = 0;
> +	}
> +
> +	return ret;
> +}
> +
> static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
> {
> 	struct mptcp_sock *msk = mptcp_sk(sk);
> @@ -1693,26 +1724,14 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
>
> 	ssock = __mptcp_nmpc_socket(msk);
> 	if (unlikely(ssock && inet_sk(ssock->sk)->defer_connect)) {
> -		struct sock *ssk = ssock->sk;
> 		int copied_syn = 0;
>
> -		lock_sock(ssk);
> -
> -		msk->connect_flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0;
> -		msk->is_sendmsg = 1;
> -		ret = tcp_sendmsg_fastopen(ssk, msg, &copied_syn, len, NULL);
> -		msk->is_sendmsg = 0;
> +		ret = mptcp_sendmsg_fastopen(sk, ssock->sk, msg, len, &copied_syn);
> 		copied += copied_syn;
> -		if (ret == -EINPROGRESS && copied_syn > 0) {
> -			/* reflect the new state on the MPTCP socket */
> -			inet_sk_state_store(sk, inet_sk_state_load(ssk));
> -			release_sock(ssk);
> +		if (ret == -EINPROGRESS && copied_syn > 0)
> 			goto out;
> -		} else if (ret) {
> -			release_sock(ssk);
> +		else if (ret)
> 			goto do_error;
> -		}
> -		release_sock(ssk);
> 	}
>
> 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
> -- 
> 2.37.3
>
>
>

--
Mat Martineau
Intel
Re: [PATCH mptcp-net v3] mptcp: fix abba deadlock on fastopen
Posted by Matthieu Baerts 1 year, 6 months ago
Hi Paolo,

On 12/10/2022 16:48, Paolo Abeni wrote:
> Our CI reported lockdep splat in the fastopen code:

(...)

> The problem is caused by the blocking inet_wait_for_connect() releasing
> and re-acquiring the msk socket lock while the subflow socket lock is
> still held and the MPTCP socket requires that the msk socket lock must
> be acquired before the subflow socket lock.
> 
> Address the issue always invoking tcp_sendmsg_fastopen() in an
> unblocking manner, and later eventually complete the blocking
> __inet_stream_connect() as needed.

Thank you for the v3!

I just have one last question here below (+ 2 small comments but a v4 is
probably not needed):

(...)

> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 1cdf9cd1a3cc..82d899233df4 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -1673,6 +1673,37 @@ static void mptcp_set_nospace(struct sock *sk)
>  	set_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags);
>  }
>  
> +static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msghdr *msg,
> +				  size_t len, int *copied_syn)
> +{
> +	unsigned int saved_flags = msg->msg_flags;
> +	struct mptcp_sock *msk = mptcp_sk(sk);
> +	int ret;
> +
> +	lock_sock(ssk);
> +	msg->msg_flags |= MSG_DONTWAIT;
> +	msk->connect_flags = O_NONBLOCK;

Just to be sure I understand properly: here O_NONBLOCK is set but that
will only be used for the subflow and only if MSG_FASTOPEN is set (and
allowed), so not for TCP_FASTOPEN_CONNECT, right?

But no need to reset them later, right?

(I'm a bit confused by the fact we set msk->connect_flags in
mptcp_stream_connect() before calling __inet_stream_connect() and not
here below)

> +	msk->is_sendmsg = 1;
> +	ret = tcp_sendmsg_fastopen(ssk, msg, copied_syn, len, NULL);
> +	msk->is_sendmsg = 0;
> +	msg->msg_flags = saved_flags;
> +	release_sock(ssk);
> +
> +	/* do the blocking bits of inet_stream_connect outside the ssk socket lock */
> +	if (ret == -EINPROGRESS && !(msg->msg_flags & MSG_DONTWAIT)) {
> +		ret = __inet_stream_connect(sk->sk_socket, msg->msg_name,
> +					    msg->msg_namelen, msg->msg_flags, 1);
> +
> +		/* Keep the same behaviout of plain TCP: zero the copied bytes in

s/behaviout/behaviour/ but I can fix that when applying the patch, I
guess "checkpatch.pl --codespell" will remind me that.

> +		 * case of any error, except timeout or signal
> +		 */
> +		if (ret && ret != -EINPROGRESS && ret != -ERESTARTSYS && ret != -EINTR)
> +			*copied_syn = 0;
> +	}
> +
> +	return ret;
> +}
> +
>  static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
>  {
>  	struct mptcp_sock *msk = mptcp_sk(sk);
> @@ -1693,26 +1724,14 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
>  
>  	ssock = __mptcp_nmpc_socket(msk);
>  	if (unlikely(ssock && inet_sk(ssock->sk)->defer_connect)) {
> -		struct sock *ssk = ssock->sk;
>  		int copied_syn = 0;
>  
> -		lock_sock(ssk);
> -
> -		msk->connect_flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0;
> -		msk->is_sendmsg = 1;
> -		ret = tcp_sendmsg_fastopen(ssk, msg, &copied_syn, len, NULL);
> -		msk->is_sendmsg = 0;
> +		ret = mptcp_sendmsg_fastopen(sk, ssock->sk, msg, len, &copied_syn);


It might look a bit strange to add some code here and then move it to a
dedicated function in the following commit but I'm sure that will be
fine for other net maintainers! :-)

Cheers,
Matt
-- 
Tessares | Belgium | Hybrid Access Solutions
www.tessares.net
Re: [PATCH mptcp-net v3] mptcp: fix abba deadlock on fastopen
Posted by Paolo Abeni 1 year, 6 months ago
On Wed, 2022-10-12 at 17:39 +0200, Matthieu Baerts wrote:
> Hi Paolo,
> 
> On 12/10/2022 16:48, Paolo Abeni wrote:
> > Our CI reported lockdep splat in the fastopen code:
> 
> (...)
> 
> > The problem is caused by the blocking inet_wait_for_connect() releasing
> > and re-acquiring the msk socket lock while the subflow socket lock is
> > still held and the MPTCP socket requires that the msk socket lock must
> > be acquired before the subflow socket lock.
> > 
> > Address the issue always invoking tcp_sendmsg_fastopen() in an
> > unblocking manner, and later eventually complete the blocking
> > __inet_stream_connect() as needed.
> 
> Thank you for the v3!
> 
> I just have one last question here below (+ 2 small comments but a v4 is
> probably not needed):
> 
> (...)
> 
> > diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> > index 1cdf9cd1a3cc..82d899233df4 100644
> > --- a/net/mptcp/protocol.c
> > +++ b/net/mptcp/protocol.c
> > @@ -1673,6 +1673,37 @@ static void mptcp_set_nospace(struct sock *sk)
> >  	set_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags);
> >  }
> >  
> > +static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msghdr *msg,
> > +				  size_t len, int *copied_syn)
> > +{
> > +	unsigned int saved_flags = msg->msg_flags;
> > +	struct mptcp_sock *msk = mptcp_sk(sk);
> > +	int ret;
> > +
> > +	lock_sock(ssk);
> > +	msg->msg_flags |= MSG_DONTWAIT;
> > +	msk->connect_flags = O_NONBLOCK;
> 
> Just to be sure I understand properly: here O_NONBLOCK is set but that
> will only be used for the subflow and only if MSG_FASTOPEN is set (and
> allowed), so not for TCP_FASTOPEN_CONNECT, right?
> 
> But no need to reset them later, right?
> 
> (I'm a bit confused by the fact we set msk->connect_flags in
> mptcp_stream_connect() before calling __inet_stream_connect() and not
> here below)

You are right, that is actually not obvious. The later
__inet_stream_connect() call will not reach mptcp_stream_connect(),
will perform only the inet_wait_for_connect() bits, so the
connect_flags will be unused.
> 
> > +	msk->is_sendmsg = 1;
> > +	ret = tcp_sendmsg_fastopen(ssk, msg, copied_syn, len, NULL);
> > +	msk->is_sendmsg = 0;
> > +	msg->msg_flags = saved_flags;
> > +	release_sock(ssk);
> > +
> > +	/* do the blocking bits of inet_stream_connect outside the ssk socket lock */
> > +	if (ret == -EINPROGRESS && !(msg->msg_flags & MSG_DONTWAIT)) {
> > +		ret = __inet_stream_connect(sk->sk_socket, msg->msg_name,
> > +					    msg->msg_namelen, msg->msg_flags, 1);
> > +
> > +		/* Keep the same behaviout of plain TCP: zero the copied bytes in
> 
> s/behaviout/behaviour/ but I can fix that when applying the patch, I
> guess "checkpatch.pl --codespell" will remind me that.
> 
> > +		 * case of any error, except timeout or signal
> > +		 */
> > +		if (ret && ret != -EINPROGRESS && ret != -ERESTARTSYS && ret != -EINTR)
> > +			*copied_syn = 0;
> > +	}
> > +
> > +	return ret;
> > +}
> > +
> >  static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
> >  {
> >  	struct mptcp_sock *msk = mptcp_sk(sk);
> > @@ -1693,26 +1724,14 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
> >  
> >  	ssock = __mptcp_nmpc_socket(msk);
> >  	if (unlikely(ssock && inet_sk(ssock->sk)->defer_connect)) {
> > -		struct sock *ssk = ssock->sk;
> >  		int copied_syn = 0;
> >  
> > -		lock_sock(ssk);
> > -
> > -		msk->connect_flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0;
> > -		msk->is_sendmsg = 1;
> > -		ret = tcp_sendmsg_fastopen(ssk, msg, &copied_syn, len, NULL);
> > -		msk->is_sendmsg = 0;
> > +		ret = mptcp_sendmsg_fastopen(sk, ssock->sk, msg, len, &copied_syn);
> 
> 
> It might look a bit strange to add some code here and then move it to a
> dedicated function in the following commit but I'm sure that will be
> fine for other net maintainers! :-)

If you prefer I can rework the 'factor out' patch to make this change
more straight-forward.

Cheers,

Paolo
Re: [PATCH mptcp-net v3] mptcp: fix abba deadlock on fastopen
Posted by Matthieu Baerts 1 year, 6 months ago
Hi Paolo, Mat,

On 13/10/2022 14:19, Paolo Abeni wrote:
> On Wed, 2022-10-12 at 17:39 +0200, Matthieu Baerts wrote:
>> Hi Paolo,
>>
>> On 12/10/2022 16:48, Paolo Abeni wrote:
>>> Our CI reported lockdep splat in the fastopen code:
>>
>> (...)
>>
>>> The problem is caused by the blocking inet_wait_for_connect() releasing
>>> and re-acquiring the msk socket lock while the subflow socket lock is
>>> still held and the MPTCP socket requires that the msk socket lock must
>>> be acquired before the subflow socket lock.
>>>
>>> Address the issue always invoking tcp_sendmsg_fastopen() in an
>>> unblocking manner, and later eventually complete the blocking
>>> __inet_stream_connect() as needed.
>>
>> Thank you for the v3!
>>
>> I just have one last question here below (+ 2 small comments but a v4 is
>> probably not needed):
>>
>> (...)
>>
>>> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
>>> index 1cdf9cd1a3cc..82d899233df4 100644
>>> --- a/net/mptcp/protocol.c
>>> +++ b/net/mptcp/protocol.c
>>> @@ -1673,6 +1673,37 @@ static void mptcp_set_nospace(struct sock *sk)
>>>  	set_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags);
>>>  }
>>>  
>>> +static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msghdr *msg,
>>> +				  size_t len, int *copied_syn)
>>> +{
>>> +	unsigned int saved_flags = msg->msg_flags;
>>> +	struct mptcp_sock *msk = mptcp_sk(sk);
>>> +	int ret;
>>> +
>>> +	lock_sock(ssk);
>>> +	msg->msg_flags |= MSG_DONTWAIT;
>>> +	msk->connect_flags = O_NONBLOCK;
>>
>> Just to be sure I understand properly: here O_NONBLOCK is set but that
>> will only be used for the subflow and only if MSG_FASTOPEN is set (and
>> allowed), so not for TCP_FASTOPEN_CONNECT, right?
>>
>> But no need to reset them later, right?
>>
>> (I'm a bit confused by the fact we set msk->connect_flags in
>> mptcp_stream_connect() before calling __inet_stream_connect() and not
>> here below)
> 
> You are right, that is actually not obvious. The later
> __inet_stream_connect() call will not reach mptcp_stream_connect(),
> will perform only the inet_wait_for_connect() bits, so the
> connect_flags will be unused.

Thank you for the explanation, that's clearer now!

>>
>>> +	msk->is_sendmsg = 1;
>>> +	ret = tcp_sendmsg_fastopen(ssk, msg, copied_syn, len, NULL);
>>> +	msk->is_sendmsg = 0;
>>> +	msg->msg_flags = saved_flags;
>>> +	release_sock(ssk);
>>> +
>>> +	/* do the blocking bits of inet_stream_connect outside the ssk socket lock */
>>> +	if (ret == -EINPROGRESS && !(msg->msg_flags & MSG_DONTWAIT)) {
>>> +		ret = __inet_stream_connect(sk->sk_socket, msg->msg_name,
>>> +					    msg->msg_namelen, msg->msg_flags, 1);
>>> +
>>> +		/* Keep the same behaviout of plain TCP: zero the copied bytes in
>>
>> s/behaviout/behaviour/ but I can fix that when applying the patch, I
>> guess "checkpatch.pl --codespell" will remind me that.

(done)

>>> +		 * case of any error, except timeout or signal
>>> +		 */
>>> +		if (ret && ret != -EINPROGRESS && ret != -ERESTARTSYS && ret != -EINTR)
>>> +			*copied_syn = 0;
>>> +	}
>>> +
>>> +	return ret;
>>> +}
>>> +
>>>  static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
>>>  {
>>>  	struct mptcp_sock *msk = mptcp_sk(sk);
>>> @@ -1693,26 +1724,14 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
>>>  
>>>  	ssock = __mptcp_nmpc_socket(msk);
>>>  	if (unlikely(ssock && inet_sk(ssock->sk)->defer_connect)) {
>>> -		struct sock *ssk = ssock->sk;
>>>  		int copied_syn = 0;
>>>  
>>> -		lock_sock(ssk);
>>> -
>>> -		msk->connect_flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0;
>>> -		msk->is_sendmsg = 1;
>>> -		ret = tcp_sendmsg_fastopen(ssk, msg, &copied_syn, len, NULL);
>>> -		msk->is_sendmsg = 0;
>>> +		ret = mptcp_sendmsg_fastopen(sk, ssock->sk, msg, len, &copied_syn);
>>
>>
>> It might look a bit strange to add some code here and then move it to a
>> dedicated function in the following commit but I'm sure that will be
>> fine for other net maintainers! :-)
> 
> If you prefer I can rework the 'factor out' patch to make this change
> more straight-forward.

No, all good, it should not cause any issues.

The current v3 looks good to me as well so I just applied with Mat's RvB
tag:

New patches for t/upstream-net:
- db11c1e73050: mptcp: factor out mptcp_connect()
- 8f2675510042: mptcp: fix abba deadlock on fastopen
- Results: 1cb20c1f7d2a..971a69ed25f3 (export-net)

New patches for t/upstream:
- db11c1e73050: mptcp: factor out mptcp_connect()
- 8f2675510042: mptcp: fix abba deadlock on fastopen
- Results: 7769807fea85..cdd78b7e0a98 (export)


Tests are now in progress:

https://cirrus-ci.com/github/multipath-tcp/mptcp_net-next/export-net/20221014T131605
https://cirrus-ci.com/github/multipath-tcp/mptcp_net-next/export/20221014T131605

Cheers,
Matt
-- 
Tessares | Belgium | Hybrid Access Solutions
www.tessares.net
Re: [PATCH mptcp-net v3] mptcp: fix abba deadlock on fastopen
Posted by Mat Martineau 1 year, 6 months ago
On Wed, 12 Oct 2022, Matthieu Baerts wrote:

> Hi Paolo,
>
> On 12/10/2022 16:48, Paolo Abeni wrote:
>> Our CI reported lockdep splat in the fastopen code:
>
> (...)
>
>> The problem is caused by the blocking inet_wait_for_connect() releasing
>> and re-acquiring the msk socket lock while the subflow socket lock is
>> still held and the MPTCP socket requires that the msk socket lock must
>> be acquired before the subflow socket lock.
>>
>> Address the issue always invoking tcp_sendmsg_fastopen() in an
>> unblocking manner, and later eventually complete the blocking
>> __inet_stream_connect() as needed.

Matthieu and Paolo -

When I tested with export-net (and cherry picked the "factor out 
mptcp_connect()" commit, I saw packetdrill failures that would indicate 
it's not blocking as expected:

FAIL [/home/mjmartin/work/packetdrill/gtests/net/mptcp/fastopen/client-TCP_FASTOPEN_CONNECT-blocking-sendmsg.pkt (ipv4)]
stdout:
stderr:
client-TCP_FASTOPEN_CONNECT-blocking-sendmsg.pkt:13: timing error: expected system call return at 0.400840 sec but happened at 0.002812 sec; tolerance 0.100000 sec
FAIL [/home/mjmartin/work/packetdrill/gtests/net/mptcp/fastopen/client-TCP_FASTOPEN_CONNECT-blocking-sendmsg.pkt (ipv6)]
stdout:
stderr:
client-TCP_FASTOPEN_CONNECT-blocking-sendmsg.pkt:13: timing error: expected system call return at 0.400827 sec but happened at 0.002901 sec; tolerance 0.100000 sec

...

FAIL 
[/home/mjmartin/work/packetdrill/gtests/net/mptcp/fastopen/client-TCP_FASTOPEN_CONNECT-blocking-sendmsg.pkt (ipv4-mapped-v6)]
stdout:
stderr:
client-TCP_FASTOPEN_CONNECT-blocking-sendmsg.pkt:13: timing error: expected system call return at 0.400735 sec but happened at 0.002602 sec; tolerance 0.100000 sec


(There were also FASTOPEN_NO_COOKIE failures because those commits aren't 
on export-net.)


Matthieu, did you see different behavior? Looking at timestamps you were 
likely doing packetdrill tests with v2 of this :)


>
> Thank you for the v3!
>
> I just have one last question here below (+ 2 small comments but a v4 is
> probably not needed):
>
> (...)
>
>> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
>> index 1cdf9cd1a3cc..82d899233df4 100644
>> --- a/net/mptcp/protocol.c
>> +++ b/net/mptcp/protocol.c
>> @@ -1673,6 +1673,37 @@ static void mptcp_set_nospace(struct sock *sk)
>>  	set_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags);
>>  }
>>
>> +static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msghdr *msg,
>> +				  size_t len, int *copied_syn)
>> +{
>> +	unsigned int saved_flags = msg->msg_flags;
>> +	struct mptcp_sock *msk = mptcp_sk(sk);
>> +	int ret;
>> +
>> +	lock_sock(ssk);
>> +	msg->msg_flags |= MSG_DONTWAIT;
>> +	msk->connect_flags = O_NONBLOCK;
>
> Just to be sure I understand properly: here O_NONBLOCK is set but that
> will only be used for the subflow and only if MSG_FASTOPEN is set (and
> allowed), so not for TCP_FASTOPEN_CONNECT, right?
>
> But no need to reset them later, right?
>
> (I'm a bit confused by the fact we set msk->connect_flags in
> mptcp_stream_connect() before calling __inet_stream_connect() and not
> here below)
>

msk->connect_flags is set above? But I wonder if the packetdrill problem 
above is happening because msk->connect_flags should be updated again 
before the __inet_stream_connect() call.


>> +	msk->is_sendmsg = 1;
>> +	ret = tcp_sendmsg_fastopen(ssk, msg, copied_syn, len, NULL);
>> +	msk->is_sendmsg = 0;
>> +	msg->msg_flags = saved_flags;
>> +	release_sock(ssk);
>> +
>> +	/* do the blocking bits of inet_stream_connect outside the ssk socket lock */
>> +	if (ret == -EINPROGRESS && !(msg->msg_flags & MSG_DONTWAIT)) {
>> +		ret = __inet_stream_connect(sk->sk_socket, msg->msg_name,
>> +					    msg->msg_namelen, msg->msg_flags, 1);
>> +
>> +		/* Keep the same behaviout of plain TCP: zero the copied bytes in
>
> s/behaviout/behaviour/ but I can fix that when applying the patch, I
> guess "checkpatch.pl --codespell" will remind me that.
>
>> +		 * case of any error, except timeout or signal
>> +		 */
>> +		if (ret && ret != -EINPROGRESS && ret != -ERESTARTSYS && ret != -EINTR)
>> +			*copied_syn = 0;
>> +	}
>> +
>> +	return ret;
>> +}
>> +
>>  static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
>>  {
>>  	struct mptcp_sock *msk = mptcp_sk(sk);
>> @@ -1693,26 +1724,14 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
>>
>>  	ssock = __mptcp_nmpc_socket(msk);
>>  	if (unlikely(ssock && inet_sk(ssock->sk)->defer_connect)) {
>> -		struct sock *ssk = ssock->sk;
>>  		int copied_syn = 0;
>>
>> -		lock_sock(ssk);
>> -
>> -		msk->connect_flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0;
>> -		msk->is_sendmsg = 1;
>> -		ret = tcp_sendmsg_fastopen(ssk, msg, &copied_syn, len, NULL);
>> -		msk->is_sendmsg = 0;
>> +		ret = mptcp_sendmsg_fastopen(sk, ssock->sk, msg, len, &copied_syn);
>
>
> It might look a bit strange to add some code here and then move it to a
> dedicated function in the following commit but I'm sure that will be
> fine for other net maintainers! :-)
>

The other changes in "mptcp: factor out mptcp_connect()" are big enough 
that I think it makes sense to have two separate commits!



--
Mat Martineau
Intel