net/mptcp/protocol.c | 136 ++++++++++++++++++++++--------------------- net/mptcp/protocol.h | 4 +- 2 files changed, 73 insertions(+), 67 deletions(-)
The current MPTCP connect implementation duplicates a bit of inet
code and does not use nor provide a struct proto->connect callback,
which in turn will not fit the upcoming fastopen implementation.
Refactor such implementation to use the common helper, moving the
MPTCP-specific bits into mptcp_connect(). Additionally, avoid an
indirect call to the subflow connect callback.
Note that the fastopen call-path invokes mptcp_connect() while already
holding the subflow socket lock. Explicitly keep track of such path
via a new MPTCP-level flag and handle the locking accordingly.
Additionally, track the connect flags in a new msk field to allow
propagating them to the subflow inet_stream_connect call.
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
v1 -> v2:
- properly update msk state on unblocking connect
---
net/mptcp/protocol.c | 136 ++++++++++++++++++++++---------------------
net/mptcp/protocol.h | 4 +-
2 files changed, 73 insertions(+), 67 deletions(-)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 8feb684408f7..1aa940928b4f 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1689,7 +1689,10 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
lock_sock(ssk);
+ msk->connect_flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0;
+ msk->is_sendmsg = 1;
ret = tcp_sendmsg_fastopen(ssk, msg, &copied_syn, len, NULL);
+ msk->is_sendmsg = 0;
copied += copied_syn;
if (ret == -EINPROGRESS && copied_syn > 0) {
/* reflect the new state on the MPTCP socket */
@@ -3506,10 +3509,73 @@ static int mptcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
return put_user(answ, (int __user *)arg);
}
+static void mptcp_subflow_early_fallback(struct mptcp_sock *msk,
+ struct mptcp_subflow_context *subflow)
+{
+ subflow->request_mptcp = 0;
+ __mptcp_do_fallback(msk);
+}
+
+static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+{
+ struct mptcp_subflow_context *subflow;
+ struct mptcp_sock *msk = mptcp_sk(sk);
+ struct socket *ssock;
+ int err = -EINVAL;
+
+ ssock = __mptcp_nmpc_socket(msk);
+ if (!ssock)
+ return -EINVAL;
+
+ mptcp_token_destroy(msk);
+ inet_sk_state_store(sk, TCP_SYN_SENT);
+ subflow = mptcp_subflow_ctx(ssock->sk);
+#ifdef CONFIG_TCP_MD5SIG
+ /* no MPTCP if MD5SIG is enabled on this socket or we may run out of
+ * TCP option space.
+ */
+ if (rcu_access_pointer(tcp_sk(ssock->sk)->md5sig_info))
+ mptcp_subflow_early_fallback(msk, subflow);
+#endif
+ if (subflow->request_mptcp && mptcp_token_new_connect(ssock->sk)) {
+ MPTCP_INC_STATS(sock_net(ssock->sk), MPTCP_MIB_TOKENFALLBACKINIT);
+ mptcp_subflow_early_fallback(msk, subflow);
+ }
+ if (likely(!__mptcp_check_fallback(msk)))
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVE);
+
+ /* if reaching here via the fastopen/sendmsg path, the caller already
+ * acquired the subflow socket lock, too.
+ */
+ if (msk->is_sendmsg)
+ err = __inet_stream_connect(ssock, uaddr, addr_len, msk->connect_flags, 1);
+ else
+ err = inet_stream_connect(ssock, uaddr, addr_len, msk->connect_flags);
+ inet_sk(sk)->defer_connect = inet_sk(ssock->sk)->defer_connect;
+
+ /* on successful connect, the msk state will be moved to established by
+ * subflow_finish_connect()
+ */
+ if (unlikely(err && err != -EINPROGRESS)) {
+ inet_sk_state_store(sk, inet_sk_state_load(ssock->sk));
+ return err;
+ }
+
+ mptcp_copy_inaddrs(sk, ssock->sk);
+
+ /* unblocking connect, mptcp-level inet_stream_connect will error out
+ * without changing the socket state, update it here.
+ */
+ if (err == -EINPROGRESS)
+ sk->sk_socket->state = ssock->state;
+ return err;
+}
+
static struct proto mptcp_prot = {
.name = "MPTCP",
.owner = THIS_MODULE,
.init = mptcp_init_sock,
+ .connect = mptcp_connect,
.disconnect = mptcp_disconnect,
.close = mptcp_close,
.accept = mptcp_accept,
@@ -3561,78 +3627,16 @@ static int mptcp_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
return err;
}
-static void mptcp_subflow_early_fallback(struct mptcp_sock *msk,
- struct mptcp_subflow_context *subflow)
-{
- subflow->request_mptcp = 0;
- __mptcp_do_fallback(msk);
-}
-
static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
int addr_len, int flags)
{
- struct mptcp_sock *msk = mptcp_sk(sock->sk);
- struct mptcp_subflow_context *subflow;
- struct socket *ssock;
- int err = -EINVAL;
+ int ret;
lock_sock(sock->sk);
- if (uaddr) {
- if (addr_len < sizeof(uaddr->sa_family))
- goto unlock;
-
- if (uaddr->sa_family == AF_UNSPEC) {
- err = mptcp_disconnect(sock->sk, flags);
- sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
- goto unlock;
- }
- }
-
- if (sock->state != SS_UNCONNECTED && msk->subflow) {
- /* pending connection or invalid state, let existing subflow
- * cope with that
- */
- ssock = msk->subflow;
- goto do_connect;
- }
-
- ssock = __mptcp_nmpc_socket(msk);
- if (!ssock)
- goto unlock;
-
- mptcp_token_destroy(msk);
- inet_sk_state_store(sock->sk, TCP_SYN_SENT);
- subflow = mptcp_subflow_ctx(ssock->sk);
-#ifdef CONFIG_TCP_MD5SIG
- /* no MPTCP if MD5SIG is enabled on this socket or we may run out of
- * TCP option space.
- */
- if (rcu_access_pointer(tcp_sk(ssock->sk)->md5sig_info))
- mptcp_subflow_early_fallback(msk, subflow);
-#endif
- if (subflow->request_mptcp && mptcp_token_new_connect(ssock->sk)) {
- MPTCP_INC_STATS(sock_net(ssock->sk), MPTCP_MIB_TOKENFALLBACKINIT);
- mptcp_subflow_early_fallback(msk, subflow);
- }
- if (likely(!__mptcp_check_fallback(msk)))
- MPTCP_INC_STATS(sock_net(sock->sk), MPTCP_MIB_MPCAPABLEACTIVE);
-
-do_connect:
- err = ssock->ops->connect(ssock, uaddr, addr_len, flags);
- inet_sk(sock->sk)->defer_connect = inet_sk(ssock->sk)->defer_connect;
- sock->state = ssock->state;
-
- /* on successful connect, the msk state will be moved to established by
- * subflow_finish_connect()
- */
- if (!err || err == -EINPROGRESS)
- mptcp_copy_inaddrs(sock->sk, ssock->sk);
- else
- inet_sk_state_store(sock->sk, inet_sk_state_load(ssock->sk));
-
-unlock:
+ mptcp_sk(sock->sk)->connect_flags = flags;
+ ret = __inet_stream_connect(sock, uaddr, addr_len, flags, 0);
release_sock(sock->sk);
- return err;
+ return ret;
}
static int mptcp_listen(struct socket *sock, int backlog)
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 93c535440a5c..18f866b1afda 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -285,7 +285,9 @@ struct mptcp_sock {
u8 mpc_endpoint_id;
u8 recvmsg_inq:1,
cork:1,
- nodelay:1;
+ nodelay:1,
+ is_sendmsg:1;
+ int connect_flags;
struct work_struct work;
struct sk_buff *ooo_last_skb;
struct rb_root out_of_order_queue;
--
2.37.3
Hi Paolo, Mat, On 04/10/2022 22:38, Paolo Abeni wrote: > The current MPTCP connect implementation duplicates a bit of inet > code and does not use nor provide a struct proto->connect callback, > which in turn will not fit the upcoming fastopen implementation. > > Refactor such implementation to use the common helper, moving the > MPTCP-specific bits into mptcp_connect(). Additionally, avoid an > indirect call to the subflow connect callback. > > Note that the fastopen call-path invokes mptcp_connect() while already > holding the subflow socket lock. Explicitly keep track of such path > via a new MPTCP-level flag and handle the locking accordingly. > > Additionally, track the connect flags in a new msk field to allow > propagating them to the subflow inet_stream_connect call. Thank you for the patch and the review! Now in our tree (feat. for net-next): New patches for t/upstream: - 04edaeadb0d1: mptcp: factor out mptcp_connect() - Results: b9a6e8d36365..996a299c16ae (export) Tests are now in progress: https://cirrus-ci.com/github/multipath-tcp/mptcp_net-next/export/20221006T162737 Cheers, Matt -- Tessares | Belgium | Hybrid Access Solutions www.tessares.net
On Tue, 4 Oct 2022, Paolo Abeni wrote:
> The current MPTCP connect implementation duplicates a bit of inet
> code and does not use nor provide a struct proto->connect callback,
> which in turn will not fit the upcoming fastopen implementation.
>
> Refactor such implementation to use the common helper, moving the
> MPTCP-specific bits into mptcp_connect(). Additionally, avoid an
> indirect call to the subflow connect callback.
>
> Note that the fastopen call-path invokes mptcp_connect() while already
> holding the subflow socket lock. Explicitly keep track of such path
> via a new MPTCP-level flag and handle the locking accordingly.
>
> Additionally, track the connect flags in a new msk field to allow
> propagating them to the subflow inet_stream_connect call.
>
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> ---
> v1 -> v2:
> - properly update msk state on unblocking connect
v2 looks good to me, I also tried it with your updated packetdrill tests
(https://github.com/multipath-tcp/packetdrill/pull/94).
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
> ---
> net/mptcp/protocol.c | 136 ++++++++++++++++++++++---------------------
> net/mptcp/protocol.h | 4 +-
> 2 files changed, 73 insertions(+), 67 deletions(-)
>
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 8feb684408f7..1aa940928b4f 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -1689,7 +1689,10 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
>
> lock_sock(ssk);
>
> + msk->connect_flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0;
> + msk->is_sendmsg = 1;
> ret = tcp_sendmsg_fastopen(ssk, msg, &copied_syn, len, NULL);
> + msk->is_sendmsg = 0;
> copied += copied_syn;
> if (ret == -EINPROGRESS && copied_syn > 0) {
> /* reflect the new state on the MPTCP socket */
> @@ -3506,10 +3509,73 @@ static int mptcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
> return put_user(answ, (int __user *)arg);
> }
>
> +static void mptcp_subflow_early_fallback(struct mptcp_sock *msk,
> + struct mptcp_subflow_context *subflow)
> +{
> + subflow->request_mptcp = 0;
> + __mptcp_do_fallback(msk);
> +}
> +
> +static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
> +{
> + struct mptcp_subflow_context *subflow;
> + struct mptcp_sock *msk = mptcp_sk(sk);
> + struct socket *ssock;
> + int err = -EINVAL;
> +
> + ssock = __mptcp_nmpc_socket(msk);
> + if (!ssock)
> + return -EINVAL;
> +
> + mptcp_token_destroy(msk);
> + inet_sk_state_store(sk, TCP_SYN_SENT);
> + subflow = mptcp_subflow_ctx(ssock->sk);
> +#ifdef CONFIG_TCP_MD5SIG
> + /* no MPTCP if MD5SIG is enabled on this socket or we may run out of
> + * TCP option space.
> + */
> + if (rcu_access_pointer(tcp_sk(ssock->sk)->md5sig_info))
> + mptcp_subflow_early_fallback(msk, subflow);
> +#endif
> + if (subflow->request_mptcp && mptcp_token_new_connect(ssock->sk)) {
> + MPTCP_INC_STATS(sock_net(ssock->sk), MPTCP_MIB_TOKENFALLBACKINIT);
> + mptcp_subflow_early_fallback(msk, subflow);
> + }
> + if (likely(!__mptcp_check_fallback(msk)))
> + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVE);
> +
> + /* if reaching here via the fastopen/sendmsg path, the caller already
> + * acquired the subflow socket lock, too.
> + */
> + if (msk->is_sendmsg)
> + err = __inet_stream_connect(ssock, uaddr, addr_len, msk->connect_flags, 1);
> + else
> + err = inet_stream_connect(ssock, uaddr, addr_len, msk->connect_flags);
> + inet_sk(sk)->defer_connect = inet_sk(ssock->sk)->defer_connect;
> +
> + /* on successful connect, the msk state will be moved to established by
> + * subflow_finish_connect()
> + */
> + if (unlikely(err && err != -EINPROGRESS)) {
> + inet_sk_state_store(sk, inet_sk_state_load(ssock->sk));
> + return err;
> + }
> +
> + mptcp_copy_inaddrs(sk, ssock->sk);
> +
> + /* unblocking connect, mptcp-level inet_stream_connect will error out
> + * without changing the socket state, update it here.
> + */
> + if (err == -EINPROGRESS)
> + sk->sk_socket->state = ssock->state;
> + return err;
> +}
> +
> static struct proto mptcp_prot = {
> .name = "MPTCP",
> .owner = THIS_MODULE,
> .init = mptcp_init_sock,
> + .connect = mptcp_connect,
> .disconnect = mptcp_disconnect,
> .close = mptcp_close,
> .accept = mptcp_accept,
> @@ -3561,78 +3627,16 @@ static int mptcp_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
> return err;
> }
>
> -static void mptcp_subflow_early_fallback(struct mptcp_sock *msk,
> - struct mptcp_subflow_context *subflow)
> -{
> - subflow->request_mptcp = 0;
> - __mptcp_do_fallback(msk);
> -}
> -
> static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
> int addr_len, int flags)
> {
> - struct mptcp_sock *msk = mptcp_sk(sock->sk);
> - struct mptcp_subflow_context *subflow;
> - struct socket *ssock;
> - int err = -EINVAL;
> + int ret;
>
> lock_sock(sock->sk);
> - if (uaddr) {
> - if (addr_len < sizeof(uaddr->sa_family))
> - goto unlock;
> -
> - if (uaddr->sa_family == AF_UNSPEC) {
> - err = mptcp_disconnect(sock->sk, flags);
> - sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
> - goto unlock;
> - }
> - }
> -
> - if (sock->state != SS_UNCONNECTED && msk->subflow) {
> - /* pending connection or invalid state, let existing subflow
> - * cope with that
> - */
> - ssock = msk->subflow;
> - goto do_connect;
> - }
> -
> - ssock = __mptcp_nmpc_socket(msk);
> - if (!ssock)
> - goto unlock;
> -
> - mptcp_token_destroy(msk);
> - inet_sk_state_store(sock->sk, TCP_SYN_SENT);
> - subflow = mptcp_subflow_ctx(ssock->sk);
> -#ifdef CONFIG_TCP_MD5SIG
> - /* no MPTCP if MD5SIG is enabled on this socket or we may run out of
> - * TCP option space.
> - */
> - if (rcu_access_pointer(tcp_sk(ssock->sk)->md5sig_info))
> - mptcp_subflow_early_fallback(msk, subflow);
> -#endif
> - if (subflow->request_mptcp && mptcp_token_new_connect(ssock->sk)) {
> - MPTCP_INC_STATS(sock_net(ssock->sk), MPTCP_MIB_TOKENFALLBACKINIT);
> - mptcp_subflow_early_fallback(msk, subflow);
> - }
> - if (likely(!__mptcp_check_fallback(msk)))
> - MPTCP_INC_STATS(sock_net(sock->sk), MPTCP_MIB_MPCAPABLEACTIVE);
> -
> -do_connect:
> - err = ssock->ops->connect(ssock, uaddr, addr_len, flags);
> - inet_sk(sock->sk)->defer_connect = inet_sk(ssock->sk)->defer_connect;
> - sock->state = ssock->state;
> -
> - /* on successful connect, the msk state will be moved to established by
> - * subflow_finish_connect()
> - */
> - if (!err || err == -EINPROGRESS)
> - mptcp_copy_inaddrs(sock->sk, ssock->sk);
> - else
> - inet_sk_state_store(sock->sk, inet_sk_state_load(ssock->sk));
> -
> -unlock:
> + mptcp_sk(sock->sk)->connect_flags = flags;
> + ret = __inet_stream_connect(sock, uaddr, addr_len, flags, 0);
> release_sock(sock->sk);
> - return err;
> + return ret;
> }
>
> static int mptcp_listen(struct socket *sock, int backlog)
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index 93c535440a5c..18f866b1afda 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -285,7 +285,9 @@ struct mptcp_sock {
> u8 mpc_endpoint_id;
> u8 recvmsg_inq:1,
> cork:1,
> - nodelay:1;
> + nodelay:1,
> + is_sendmsg:1;
> + int connect_flags;
> struct work_struct work;
> struct sk_buff *ooo_last_skb;
> struct rb_root out_of_order_queue;
> --
> 2.37.3
>
>
>
--
Mat Martineau
Intel
© 2016 - 2025 Red Hat, Inc.