net/mptcp/protocol.c | 49 ++++++++++++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 15 deletions(-)
Our CI reported lockdep splat in the fastopen code:
======================================================
WARNING: possible circular locking dependency detected
6.0.0.mptcp_f5e8bfe9878d+ #1558 Not tainted
------------------------------------------------------
packetdrill/1071 is trying to acquire lock:
ffff8881bd198140 (sk_lock-AF_INET){+.+.}-{0:0}, at: inet_wait_for_connect+0x19c/0x310
but task is already holding lock:
ffff8881b8346540 (k-sk_lock-AF_INET){+.+.}-{0:0}, at: mptcp_sendmsg+0xfdf/0x1740
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #1 (k-sk_lock-AF_INET){+.+.}-{0:0}:
__lock_acquire+0xb6d/0x1860
lock_acquire+0x1d8/0x620
lock_sock_nested+0x37/0xd0
inet_stream_connect+0x3f/0xa0
mptcp_connect+0x411/0x800
__inet_stream_connect+0x3ab/0x800
mptcp_stream_connect+0xac/0x110
__sys_connect+0x101/0x130
__x64_sys_connect+0x6e/0xb0
do_syscall_64+0x59/0x90
entry_SYSCALL_64_after_hwframe+0x63/0xcd
-> #0 (sk_lock-AF_INET){+.+.}-{0:0}:
check_prev_add+0x15e/0x2110
validate_chain+0xace/0xdf0
__lock_acquire+0xb6d/0x1860
lock_acquire+0x1d8/0x620
lock_sock_nested+0x37/0xd0
inet_wait_for_connect+0x19c/0x310
__inet_stream_connect+0x26c/0x800
tcp_sendmsg_fastopen+0x341/0x650
mptcp_sendmsg+0x109d/0x1740
sock_sendmsg+0xe1/0x120
__sys_sendto+0x1c7/0x2a0
__x64_sys_sendto+0xdc/0x1b0
do_syscall_64+0x59/0x90
entry_SYSCALL_64_after_hwframe+0x63/0xcd
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock(k-sk_lock-AF_INET);
lock(sk_lock-AF_INET);
lock(k-sk_lock-AF_INET);
lock(sk_lock-AF_INET);
*** DEADLOCK ***
1 lock held by packetdrill/1071:
#0: ffff8881b8346540 (k-sk_lock-AF_INET){+.+.}-{0:0}, at: mptcp_sendmsg+0xfdf/0x1740
======================================================
The problem is caused by the blocking inet_wait_for_connect() releasing
and re-acquiring the msk socket lock while the subflow socket lock is
still held and the MPTCP socket requires that the msk socket lock must
be acquired before the subflow socket lock.
Address the issue always invoking tcp_sendmsg_fastopen() in an
unblocking manner, and later eventually complete the blocking
__inet_stream_connect() as needed.
Fixes: d98a82a6afc7 ("mptcp: handle defer connect in mptcp_sendmsg")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
v2 -> v3:
- dropped FASTOPEN_CONNECT chunk (Matttbe)
- comments fixlet (Matttbe)
- rebased on top of (""mptcp: factor out mptcp_connect()")
v1 -> v2:
- really invoke fastopen in unblocking manner
- clear copied_syn on completion errors
---
net/mptcp/protocol.c | 49 ++++++++++++++++++++++++++++++--------------
1 file changed, 34 insertions(+), 15 deletions(-)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 1cdf9cd1a3cc..82d899233df4 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1673,6 +1673,37 @@ static void mptcp_set_nospace(struct sock *sk)
set_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags);
}
+static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msghdr *msg,
+ size_t len, int *copied_syn)
+{
+ unsigned int saved_flags = msg->msg_flags;
+ struct mptcp_sock *msk = mptcp_sk(sk);
+ int ret;
+
+ lock_sock(ssk);
+ msg->msg_flags |= MSG_DONTWAIT;
+ msk->connect_flags = O_NONBLOCK;
+ msk->is_sendmsg = 1;
+ ret = tcp_sendmsg_fastopen(ssk, msg, copied_syn, len, NULL);
+ msk->is_sendmsg = 0;
+ msg->msg_flags = saved_flags;
+ release_sock(ssk);
+
+ /* do the blocking bits of inet_stream_connect outside the ssk socket lock */
+ if (ret == -EINPROGRESS && !(msg->msg_flags & MSG_DONTWAIT)) {
+ ret = __inet_stream_connect(sk->sk_socket, msg->msg_name,
+ msg->msg_namelen, msg->msg_flags, 1);
+
+ /* Keep the same behaviout of plain TCP: zero the copied bytes in
+ * case of any error, except timeout or signal
+ */
+ if (ret && ret != -EINPROGRESS && ret != -ERESTARTSYS && ret != -EINTR)
+ *copied_syn = 0;
+ }
+
+ return ret;
+}
+
static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -1693,26 +1724,14 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
ssock = __mptcp_nmpc_socket(msk);
if (unlikely(ssock && inet_sk(ssock->sk)->defer_connect)) {
- struct sock *ssk = ssock->sk;
int copied_syn = 0;
- lock_sock(ssk);
-
- msk->connect_flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0;
- msk->is_sendmsg = 1;
- ret = tcp_sendmsg_fastopen(ssk, msg, &copied_syn, len, NULL);
- msk->is_sendmsg = 0;
+ ret = mptcp_sendmsg_fastopen(sk, ssock->sk, msg, len, &copied_syn);
copied += copied_syn;
- if (ret == -EINPROGRESS && copied_syn > 0) {
- /* reflect the new state on the MPTCP socket */
- inet_sk_state_store(sk, inet_sk_state_load(ssk));
- release_sock(ssk);
+ if (ret == -EINPROGRESS && copied_syn > 0)
goto out;
- } else if (ret) {
- release_sock(ssk);
+ else if (ret)
goto do_error;
- }
- release_sock(ssk);
}
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
--
2.37.3
On Wed, 12 Oct 2022, Paolo Abeni wrote: > Our CI reported lockdep splat in the fastopen code: > ====================================================== > WARNING: possible circular locking dependency detected > 6.0.0.mptcp_f5e8bfe9878d+ #1558 Not tainted > ------------------------------------------------------ > packetdrill/1071 is trying to acquire lock: > ffff8881bd198140 (sk_lock-AF_INET){+.+.}-{0:0}, at: inet_wait_for_connect+0x19c/0x310 > > but task is already holding lock: > ffff8881b8346540 (k-sk_lock-AF_INET){+.+.}-{0:0}, at: mptcp_sendmsg+0xfdf/0x1740 > > which lock already depends on the new lock. > > the existing dependency chain (in reverse order) is: > > -> #1 (k-sk_lock-AF_INET){+.+.}-{0:0}: > __lock_acquire+0xb6d/0x1860 > lock_acquire+0x1d8/0x620 > lock_sock_nested+0x37/0xd0 > inet_stream_connect+0x3f/0xa0 > mptcp_connect+0x411/0x800 > __inet_stream_connect+0x3ab/0x800 > mptcp_stream_connect+0xac/0x110 > __sys_connect+0x101/0x130 > __x64_sys_connect+0x6e/0xb0 > do_syscall_64+0x59/0x90 > entry_SYSCALL_64_after_hwframe+0x63/0xcd > > -> #0 (sk_lock-AF_INET){+.+.}-{0:0}: > check_prev_add+0x15e/0x2110 > validate_chain+0xace/0xdf0 > __lock_acquire+0xb6d/0x1860 > lock_acquire+0x1d8/0x620 > lock_sock_nested+0x37/0xd0 > inet_wait_for_connect+0x19c/0x310 > __inet_stream_connect+0x26c/0x800 > tcp_sendmsg_fastopen+0x341/0x650 > mptcp_sendmsg+0x109d/0x1740 > sock_sendmsg+0xe1/0x120 > __sys_sendto+0x1c7/0x2a0 > __x64_sys_sendto+0xdc/0x1b0 > do_syscall_64+0x59/0x90 > entry_SYSCALL_64_after_hwframe+0x63/0xcd > > other info that might help us debug this: > > Possible unsafe locking scenario: > > CPU0 CPU1 > ---- ---- > lock(k-sk_lock-AF_INET); > lock(sk_lock-AF_INET); > lock(k-sk_lock-AF_INET); > lock(sk_lock-AF_INET); > > *** DEADLOCK *** > > 1 lock held by packetdrill/1071: > #0: ffff8881b8346540 (k-sk_lock-AF_INET){+.+.}-{0:0}, at: mptcp_sendmsg+0xfdf/0x1740 > ====================================================== > > The problem is caused by the blocking inet_wait_for_connect() releasing > and re-acquiring the msk socket lock while the subflow socket lock is > still held and the MPTCP socket requires that the msk socket lock must > be acquired before the subflow socket lock. > > Address the issue always invoking tcp_sendmsg_fastopen() in an > unblocking manner, and later eventually complete the blocking > __inet_stream_connect() as needed. > > Fixes: d98a82a6afc7 ("mptcp: handle defer connect in mptcp_sendmsg") > Signed-off-by: Paolo Abeni <pabeni@redhat.com> > --- > v2 -> v3: > - dropped FASTOPEN_CONNECT chunk (Matttbe) > - comments fixlet (Matttbe) > - rebased on top of (""mptcp: factor out mptcp_connect()") > I don't know if Matthieu would like additional changes, but with the suggested change to https://github.com/multipath-tcp/packetdrill/pull/97 this v3 patch is testing fine for me: Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com> > v1 -> v2: > - really invoke fastopen in unblocking manner > - clear copied_syn on completion errors > --- > net/mptcp/protocol.c | 49 ++++++++++++++++++++++++++++++-------------- > 1 file changed, 34 insertions(+), 15 deletions(-) > > diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c > index 1cdf9cd1a3cc..82d899233df4 100644 > --- a/net/mptcp/protocol.c > +++ b/net/mptcp/protocol.c > @@ -1673,6 +1673,37 @@ static void mptcp_set_nospace(struct sock *sk) > set_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags); > } > > +static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msghdr *msg, > + size_t len, int *copied_syn) > +{ > + unsigned int saved_flags = msg->msg_flags; > + struct mptcp_sock *msk = mptcp_sk(sk); > + int ret; > + > + lock_sock(ssk); > + msg->msg_flags |= MSG_DONTWAIT; > + msk->connect_flags = O_NONBLOCK; > + msk->is_sendmsg = 1; > + ret = tcp_sendmsg_fastopen(ssk, msg, copied_syn, len, NULL); > + msk->is_sendmsg = 0; > + msg->msg_flags = saved_flags; > + release_sock(ssk); > + > + /* do the blocking bits of inet_stream_connect outside the ssk socket lock */ > + if (ret == -EINPROGRESS && !(msg->msg_flags & MSG_DONTWAIT)) { > + ret = __inet_stream_connect(sk->sk_socket, msg->msg_name, > + msg->msg_namelen, msg->msg_flags, 1); > + > + /* Keep the same behaviout of plain TCP: zero the copied bytes in > + * case of any error, except timeout or signal > + */ > + if (ret && ret != -EINPROGRESS && ret != -ERESTARTSYS && ret != -EINTR) > + *copied_syn = 0; > + } > + > + return ret; > +} > + > static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) > { > struct mptcp_sock *msk = mptcp_sk(sk); > @@ -1693,26 +1724,14 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) > > ssock = __mptcp_nmpc_socket(msk); > if (unlikely(ssock && inet_sk(ssock->sk)->defer_connect)) { > - struct sock *ssk = ssock->sk; > int copied_syn = 0; > > - lock_sock(ssk); > - > - msk->connect_flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0; > - msk->is_sendmsg = 1; > - ret = tcp_sendmsg_fastopen(ssk, msg, &copied_syn, len, NULL); > - msk->is_sendmsg = 0; > + ret = mptcp_sendmsg_fastopen(sk, ssock->sk, msg, len, &copied_syn); > copied += copied_syn; > - if (ret == -EINPROGRESS && copied_syn > 0) { > - /* reflect the new state on the MPTCP socket */ > - inet_sk_state_store(sk, inet_sk_state_load(ssk)); > - release_sock(ssk); > + if (ret == -EINPROGRESS && copied_syn > 0) > goto out; > - } else if (ret) { > - release_sock(ssk); > + else if (ret) > goto do_error; > - } > - release_sock(ssk); > } > > timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); > -- > 2.37.3 > > > -- Mat Martineau Intel
Hi Paolo, On 12/10/2022 16:48, Paolo Abeni wrote: > Our CI reported lockdep splat in the fastopen code: (...) > The problem is caused by the blocking inet_wait_for_connect() releasing > and re-acquiring the msk socket lock while the subflow socket lock is > still held and the MPTCP socket requires that the msk socket lock must > be acquired before the subflow socket lock. > > Address the issue always invoking tcp_sendmsg_fastopen() in an > unblocking manner, and later eventually complete the blocking > __inet_stream_connect() as needed. Thank you for the v3! I just have one last question here below (+ 2 small comments but a v4 is probably not needed): (...) > diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c > index 1cdf9cd1a3cc..82d899233df4 100644 > --- a/net/mptcp/protocol.c > +++ b/net/mptcp/protocol.c > @@ -1673,6 +1673,37 @@ static void mptcp_set_nospace(struct sock *sk) > set_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags); > } > > +static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msghdr *msg, > + size_t len, int *copied_syn) > +{ > + unsigned int saved_flags = msg->msg_flags; > + struct mptcp_sock *msk = mptcp_sk(sk); > + int ret; > + > + lock_sock(ssk); > + msg->msg_flags |= MSG_DONTWAIT; > + msk->connect_flags = O_NONBLOCK; Just to be sure I understand properly: here O_NONBLOCK is set but that will only be used for the subflow and only if MSG_FASTOPEN is set (and allowed), so not for TCP_FASTOPEN_CONNECT, right? But no need to reset them later, right? (I'm a bit confused by the fact we set msk->connect_flags in mptcp_stream_connect() before calling __inet_stream_connect() and not here below) > + msk->is_sendmsg = 1; > + ret = tcp_sendmsg_fastopen(ssk, msg, copied_syn, len, NULL); > + msk->is_sendmsg = 0; > + msg->msg_flags = saved_flags; > + release_sock(ssk); > + > + /* do the blocking bits of inet_stream_connect outside the ssk socket lock */ > + if (ret == -EINPROGRESS && !(msg->msg_flags & MSG_DONTWAIT)) { > + ret = __inet_stream_connect(sk->sk_socket, msg->msg_name, > + msg->msg_namelen, msg->msg_flags, 1); > + > + /* Keep the same behaviout of plain TCP: zero the copied bytes in s/behaviout/behaviour/ but I can fix that when applying the patch, I guess "checkpatch.pl --codespell" will remind me that. > + * case of any error, except timeout or signal > + */ > + if (ret && ret != -EINPROGRESS && ret != -ERESTARTSYS && ret != -EINTR) > + *copied_syn = 0; > + } > + > + return ret; > +} > + > static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) > { > struct mptcp_sock *msk = mptcp_sk(sk); > @@ -1693,26 +1724,14 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) > > ssock = __mptcp_nmpc_socket(msk); > if (unlikely(ssock && inet_sk(ssock->sk)->defer_connect)) { > - struct sock *ssk = ssock->sk; > int copied_syn = 0; > > - lock_sock(ssk); > - > - msk->connect_flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0; > - msk->is_sendmsg = 1; > - ret = tcp_sendmsg_fastopen(ssk, msg, &copied_syn, len, NULL); > - msk->is_sendmsg = 0; > + ret = mptcp_sendmsg_fastopen(sk, ssock->sk, msg, len, &copied_syn); It might look a bit strange to add some code here and then move it to a dedicated function in the following commit but I'm sure that will be fine for other net maintainers! :-) Cheers, Matt -- Tessares | Belgium | Hybrid Access Solutions www.tessares.net
On Wed, 2022-10-12 at 17:39 +0200, Matthieu Baerts wrote: > Hi Paolo, > > On 12/10/2022 16:48, Paolo Abeni wrote: > > Our CI reported lockdep splat in the fastopen code: > > (...) > > > The problem is caused by the blocking inet_wait_for_connect() releasing > > and re-acquiring the msk socket lock while the subflow socket lock is > > still held and the MPTCP socket requires that the msk socket lock must > > be acquired before the subflow socket lock. > > > > Address the issue always invoking tcp_sendmsg_fastopen() in an > > unblocking manner, and later eventually complete the blocking > > __inet_stream_connect() as needed. > > Thank you for the v3! > > I just have one last question here below (+ 2 small comments but a v4 is > probably not needed): > > (...) > > > diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c > > index 1cdf9cd1a3cc..82d899233df4 100644 > > --- a/net/mptcp/protocol.c > > +++ b/net/mptcp/protocol.c > > @@ -1673,6 +1673,37 @@ static void mptcp_set_nospace(struct sock *sk) > > set_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags); > > } > > > > +static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msghdr *msg, > > + size_t len, int *copied_syn) > > +{ > > + unsigned int saved_flags = msg->msg_flags; > > + struct mptcp_sock *msk = mptcp_sk(sk); > > + int ret; > > + > > + lock_sock(ssk); > > + msg->msg_flags |= MSG_DONTWAIT; > > + msk->connect_flags = O_NONBLOCK; > > Just to be sure I understand properly: here O_NONBLOCK is set but that > will only be used for the subflow and only if MSG_FASTOPEN is set (and > allowed), so not for TCP_FASTOPEN_CONNECT, right? > > But no need to reset them later, right? > > (I'm a bit confused by the fact we set msk->connect_flags in > mptcp_stream_connect() before calling __inet_stream_connect() and not > here below) You are right, that is actually not obvious. The later __inet_stream_connect() call will not reach mptcp_stream_connect(), will perform only the inet_wait_for_connect() bits, so the connect_flags will be unused. > > > + msk->is_sendmsg = 1; > > + ret = tcp_sendmsg_fastopen(ssk, msg, copied_syn, len, NULL); > > + msk->is_sendmsg = 0; > > + msg->msg_flags = saved_flags; > > + release_sock(ssk); > > + > > + /* do the blocking bits of inet_stream_connect outside the ssk socket lock */ > > + if (ret == -EINPROGRESS && !(msg->msg_flags & MSG_DONTWAIT)) { > > + ret = __inet_stream_connect(sk->sk_socket, msg->msg_name, > > + msg->msg_namelen, msg->msg_flags, 1); > > + > > + /* Keep the same behaviout of plain TCP: zero the copied bytes in > > s/behaviout/behaviour/ but I can fix that when applying the patch, I > guess "checkpatch.pl --codespell" will remind me that. > > > + * case of any error, except timeout or signal > > + */ > > + if (ret && ret != -EINPROGRESS && ret != -ERESTARTSYS && ret != -EINTR) > > + *copied_syn = 0; > > + } > > + > > + return ret; > > +} > > + > > static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) > > { > > struct mptcp_sock *msk = mptcp_sk(sk); > > @@ -1693,26 +1724,14 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) > > > > ssock = __mptcp_nmpc_socket(msk); > > if (unlikely(ssock && inet_sk(ssock->sk)->defer_connect)) { > > - struct sock *ssk = ssock->sk; > > int copied_syn = 0; > > > > - lock_sock(ssk); > > - > > - msk->connect_flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0; > > - msk->is_sendmsg = 1; > > - ret = tcp_sendmsg_fastopen(ssk, msg, &copied_syn, len, NULL); > > - msk->is_sendmsg = 0; > > + ret = mptcp_sendmsg_fastopen(sk, ssock->sk, msg, len, &copied_syn); > > > It might look a bit strange to add some code here and then move it to a > dedicated function in the following commit but I'm sure that will be > fine for other net maintainers! :-) If you prefer I can rework the 'factor out' patch to make this change more straight-forward. Cheers, Paolo
Hi Paolo, Mat, On 13/10/2022 14:19, Paolo Abeni wrote: > On Wed, 2022-10-12 at 17:39 +0200, Matthieu Baerts wrote: >> Hi Paolo, >> >> On 12/10/2022 16:48, Paolo Abeni wrote: >>> Our CI reported lockdep splat in the fastopen code: >> >> (...) >> >>> The problem is caused by the blocking inet_wait_for_connect() releasing >>> and re-acquiring the msk socket lock while the subflow socket lock is >>> still held and the MPTCP socket requires that the msk socket lock must >>> be acquired before the subflow socket lock. >>> >>> Address the issue always invoking tcp_sendmsg_fastopen() in an >>> unblocking manner, and later eventually complete the blocking >>> __inet_stream_connect() as needed. >> >> Thank you for the v3! >> >> I just have one last question here below (+ 2 small comments but a v4 is >> probably not needed): >> >> (...) >> >>> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c >>> index 1cdf9cd1a3cc..82d899233df4 100644 >>> --- a/net/mptcp/protocol.c >>> +++ b/net/mptcp/protocol.c >>> @@ -1673,6 +1673,37 @@ static void mptcp_set_nospace(struct sock *sk) >>> set_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags); >>> } >>> >>> +static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msghdr *msg, >>> + size_t len, int *copied_syn) >>> +{ >>> + unsigned int saved_flags = msg->msg_flags; >>> + struct mptcp_sock *msk = mptcp_sk(sk); >>> + int ret; >>> + >>> + lock_sock(ssk); >>> + msg->msg_flags |= MSG_DONTWAIT; >>> + msk->connect_flags = O_NONBLOCK; >> >> Just to be sure I understand properly: here O_NONBLOCK is set but that >> will only be used for the subflow and only if MSG_FASTOPEN is set (and >> allowed), so not for TCP_FASTOPEN_CONNECT, right? >> >> But no need to reset them later, right? >> >> (I'm a bit confused by the fact we set msk->connect_flags in >> mptcp_stream_connect() before calling __inet_stream_connect() and not >> here below) > > You are right, that is actually not obvious. The later > __inet_stream_connect() call will not reach mptcp_stream_connect(), > will perform only the inet_wait_for_connect() bits, so the > connect_flags will be unused. Thank you for the explanation, that's clearer now! >> >>> + msk->is_sendmsg = 1; >>> + ret = tcp_sendmsg_fastopen(ssk, msg, copied_syn, len, NULL); >>> + msk->is_sendmsg = 0; >>> + msg->msg_flags = saved_flags; >>> + release_sock(ssk); >>> + >>> + /* do the blocking bits of inet_stream_connect outside the ssk socket lock */ >>> + if (ret == -EINPROGRESS && !(msg->msg_flags & MSG_DONTWAIT)) { >>> + ret = __inet_stream_connect(sk->sk_socket, msg->msg_name, >>> + msg->msg_namelen, msg->msg_flags, 1); >>> + >>> + /* Keep the same behaviout of plain TCP: zero the copied bytes in >> >> s/behaviout/behaviour/ but I can fix that when applying the patch, I >> guess "checkpatch.pl --codespell" will remind me that. (done) >>> + * case of any error, except timeout or signal >>> + */ >>> + if (ret && ret != -EINPROGRESS && ret != -ERESTARTSYS && ret != -EINTR) >>> + *copied_syn = 0; >>> + } >>> + >>> + return ret; >>> +} >>> + >>> static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) >>> { >>> struct mptcp_sock *msk = mptcp_sk(sk); >>> @@ -1693,26 +1724,14 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) >>> >>> ssock = __mptcp_nmpc_socket(msk); >>> if (unlikely(ssock && inet_sk(ssock->sk)->defer_connect)) { >>> - struct sock *ssk = ssock->sk; >>> int copied_syn = 0; >>> >>> - lock_sock(ssk); >>> - >>> - msk->connect_flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0; >>> - msk->is_sendmsg = 1; >>> - ret = tcp_sendmsg_fastopen(ssk, msg, &copied_syn, len, NULL); >>> - msk->is_sendmsg = 0; >>> + ret = mptcp_sendmsg_fastopen(sk, ssock->sk, msg, len, &copied_syn); >> >> >> It might look a bit strange to add some code here and then move it to a >> dedicated function in the following commit but I'm sure that will be >> fine for other net maintainers! :-) > > If you prefer I can rework the 'factor out' patch to make this change > more straight-forward. No, all good, it should not cause any issues. The current v3 looks good to me as well so I just applied with Mat's RvB tag: New patches for t/upstream-net: - db11c1e73050: mptcp: factor out mptcp_connect() - 8f2675510042: mptcp: fix abba deadlock on fastopen - Results: 1cb20c1f7d2a..971a69ed25f3 (export-net) New patches for t/upstream: - db11c1e73050: mptcp: factor out mptcp_connect() - 8f2675510042: mptcp: fix abba deadlock on fastopen - Results: 7769807fea85..cdd78b7e0a98 (export) Tests are now in progress: https://cirrus-ci.com/github/multipath-tcp/mptcp_net-next/export-net/20221014T131605 https://cirrus-ci.com/github/multipath-tcp/mptcp_net-next/export/20221014T131605 Cheers, Matt -- Tessares | Belgium | Hybrid Access Solutions www.tessares.net
On Wed, 12 Oct 2022, Matthieu Baerts wrote: > Hi Paolo, > > On 12/10/2022 16:48, Paolo Abeni wrote: >> Our CI reported lockdep splat in the fastopen code: > > (...) > >> The problem is caused by the blocking inet_wait_for_connect() releasing >> and re-acquiring the msk socket lock while the subflow socket lock is >> still held and the MPTCP socket requires that the msk socket lock must >> be acquired before the subflow socket lock. >> >> Address the issue always invoking tcp_sendmsg_fastopen() in an >> unblocking manner, and later eventually complete the blocking >> __inet_stream_connect() as needed. Matthieu and Paolo - When I tested with export-net (and cherry picked the "factor out mptcp_connect()" commit, I saw packetdrill failures that would indicate it's not blocking as expected: FAIL [/home/mjmartin/work/packetdrill/gtests/net/mptcp/fastopen/client-TCP_FASTOPEN_CONNECT-blocking-sendmsg.pkt (ipv4)] stdout: stderr: client-TCP_FASTOPEN_CONNECT-blocking-sendmsg.pkt:13: timing error: expected system call return at 0.400840 sec but happened at 0.002812 sec; tolerance 0.100000 sec FAIL [/home/mjmartin/work/packetdrill/gtests/net/mptcp/fastopen/client-TCP_FASTOPEN_CONNECT-blocking-sendmsg.pkt (ipv6)] stdout: stderr: client-TCP_FASTOPEN_CONNECT-blocking-sendmsg.pkt:13: timing error: expected system call return at 0.400827 sec but happened at 0.002901 sec; tolerance 0.100000 sec ... FAIL [/home/mjmartin/work/packetdrill/gtests/net/mptcp/fastopen/client-TCP_FASTOPEN_CONNECT-blocking-sendmsg.pkt (ipv4-mapped-v6)] stdout: stderr: client-TCP_FASTOPEN_CONNECT-blocking-sendmsg.pkt:13: timing error: expected system call return at 0.400735 sec but happened at 0.002602 sec; tolerance 0.100000 sec (There were also FASTOPEN_NO_COOKIE failures because those commits aren't on export-net.) Matthieu, did you see different behavior? Looking at timestamps you were likely doing packetdrill tests with v2 of this :) > > Thank you for the v3! > > I just have one last question here below (+ 2 small comments but a v4 is > probably not needed): > > (...) > >> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c >> index 1cdf9cd1a3cc..82d899233df4 100644 >> --- a/net/mptcp/protocol.c >> +++ b/net/mptcp/protocol.c >> @@ -1673,6 +1673,37 @@ static void mptcp_set_nospace(struct sock *sk) >> set_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags); >> } >> >> +static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msghdr *msg, >> + size_t len, int *copied_syn) >> +{ >> + unsigned int saved_flags = msg->msg_flags; >> + struct mptcp_sock *msk = mptcp_sk(sk); >> + int ret; >> + >> + lock_sock(ssk); >> + msg->msg_flags |= MSG_DONTWAIT; >> + msk->connect_flags = O_NONBLOCK; > > Just to be sure I understand properly: here O_NONBLOCK is set but that > will only be used for the subflow and only if MSG_FASTOPEN is set (and > allowed), so not for TCP_FASTOPEN_CONNECT, right? > > But no need to reset them later, right? > > (I'm a bit confused by the fact we set msk->connect_flags in > mptcp_stream_connect() before calling __inet_stream_connect() and not > here below) > msk->connect_flags is set above? But I wonder if the packetdrill problem above is happening because msk->connect_flags should be updated again before the __inet_stream_connect() call. >> + msk->is_sendmsg = 1; >> + ret = tcp_sendmsg_fastopen(ssk, msg, copied_syn, len, NULL); >> + msk->is_sendmsg = 0; >> + msg->msg_flags = saved_flags; >> + release_sock(ssk); >> + >> + /* do the blocking bits of inet_stream_connect outside the ssk socket lock */ >> + if (ret == -EINPROGRESS && !(msg->msg_flags & MSG_DONTWAIT)) { >> + ret = __inet_stream_connect(sk->sk_socket, msg->msg_name, >> + msg->msg_namelen, msg->msg_flags, 1); >> + >> + /* Keep the same behaviout of plain TCP: zero the copied bytes in > > s/behaviout/behaviour/ but I can fix that when applying the patch, I > guess "checkpatch.pl --codespell" will remind me that. > >> + * case of any error, except timeout or signal >> + */ >> + if (ret && ret != -EINPROGRESS && ret != -ERESTARTSYS && ret != -EINTR) >> + *copied_syn = 0; >> + } >> + >> + return ret; >> +} >> + >> static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) >> { >> struct mptcp_sock *msk = mptcp_sk(sk); >> @@ -1693,26 +1724,14 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) >> >> ssock = __mptcp_nmpc_socket(msk); >> if (unlikely(ssock && inet_sk(ssock->sk)->defer_connect)) { >> - struct sock *ssk = ssock->sk; >> int copied_syn = 0; >> >> - lock_sock(ssk); >> - >> - msk->connect_flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0; >> - msk->is_sendmsg = 1; >> - ret = tcp_sendmsg_fastopen(ssk, msg, &copied_syn, len, NULL); >> - msk->is_sendmsg = 0; >> + ret = mptcp_sendmsg_fastopen(sk, ssock->sk, msg, len, &copied_syn); > > > It might look a bit strange to add some code here and then move it to a > dedicated function in the following commit but I'm sure that will be > fine for other net maintainers! :-) > The other changes in "mptcp: factor out mptcp_connect()" are big enough that I think it makes sense to have two separate commits! -- Mat Martineau Intel
© 2016 - 2024 Red Hat, Inc.