:p
atchew
Login
This set of patches will bring "Fast Open" Option support to MPTCP. The aim of Fast Open Mechanism is to eliminate one round trip time from a TCP conversation by allowing data to be included as part of the SYN segment that initiates the connection. IETF RFC 8684: Appendix B. TCP Fast Open and MPTCP. [PATCH v3] includes "client-server" partial support for : 1. MPTCP cookie request from client. 2. MPTCP cookie offering from server. 3. MPTCP SYN+DATA+COOKIE from client. 4. subsequent write + read on the opened socket. This patch is Work In Progress transitional draft. There was a pause in code development that was unpaused recently. Now this code is based on the top of mptcp-next branch. The option below will be modified in future inelligently, depending on socket type (TCP||MPTCP): *tcp_options ^= OPTION_TS You also might notice some of commented pieces of the upstream code - that (is probably not good) and was done to observe an expected behavior of MPTCP Fast Open mechanism. Any comments how to achive the same behavior of MPTCP_FO without commenting the related parts of the code are welcome. Signed-off-by: Dmytro SHYTYI <dmytro@shytyi.net> --- include/net/mptcp.h | 2 +- net/ipv4/tcp_fastopen.c | 4 +++ net/ipv4/tcp_input.c | 7 ++--- net/ipv4/tcp_output.c | 3 +-- net/mptcp/options.c | 8 ++++-- net/mptcp/protocol.c | 59 ++++++++++++++++++++++++++++++++++++++--- net/mptcp/sockopt.c | 41 ++++++++++++++++++++++++++++ net/mptcp/subflow.c | 9 ++++--- 8 files changed, 118 insertions(+), 15 deletions(-) diff --git a/include/net/mptcp.h b/include/net/mptcp.h index XXXXXXX..XXXXXXX 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -XXX,XX +XXX,XX @@ void mptcp_space(const struct sock *ssk, int *space, int *full_space); bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb, unsigned int *size, struct mptcp_out_options *opts); bool mptcp_synack_options(const struct request_sock *req, unsigned int *size, - struct mptcp_out_options *opts); + struct mptcp_out_options *opts, u16 *tcp_options); bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, unsigned int *size, unsigned int remaining, struct mptcp_out_options *opts); diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index XXXXXXX..XXXXXXX 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -XXX,XX +XXX,XX @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, struct tcp_fastopen_cookie *foc, const struct dst_entry *dst) { + /* bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1; int tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen; + */ struct tcp_fastopen_cookie valid_foc = { .len = -1 }; struct sock *child; int ret = 0; @@ -XXX,XX +XXX,XX @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, if (foc->len == 0) /* Client requests a cookie */ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD); + /* if (!((tcp_fastopen & TFO_SERVER_ENABLE) && (syn_data || foc->len >= 0) && tcp_fastopen_queue_check(sk))) { foc->len = -1; return NULL; } + */ if (tcp_fastopen_no_cookie(sk, dst, TFO_SERVER_COOKIE_NOT_REQD)) goto fastopen; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index XXXXXXX..XXXXXXX 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -XXX,XX +XXX,XX @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, } if (fastopen_fail) return -1; - if (sk->sk_write_pending || - icsk->icsk_accept_queue.rskq_defer_accept || - inet_csk_in_pingpong_mode(sk)) { + + if (!sk_is_mptcp(sk) && (sk->sk_write_pending || + icsk->icsk_accept_queue.rskq_defer_accept || + inet_csk_in_pingpong_mode(sk))) { /* Save one ACK. Data will be ready after * several ticks, if write_pending is set. * diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index XXXXXXX..XXXXXXX 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -XXX,XX +XXX,XX @@ static void mptcp_set_option_cond(const struct request_sock *req, if (rsk_is_mptcp(req)) { unsigned int size; - if (mptcp_synack_options(req, &size, &opts->mptcp)) { + if (mptcp_synack_options(req, &size, &opts->mptcp, &opts->options)) { if (*remaining >= size) { opts->options |= OPTION_MPTCP; *remaining -= size; @@ -XXX,XX +XXX,XX @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, tp->syn_fastopen_exp = fastopen->cookie.exp ? 1 : 0; } } - smc_set_option(tp, opts, &remaining); if (sk_is_mptcp(sk)) { diff --git a/net/mptcp/options.c b/net/mptcp/options.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -XXX,XX +XXX,XX @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, } bool mptcp_synack_options(const struct request_sock *req, unsigned int *size, - struct mptcp_out_options *opts) + struct mptcp_out_options *opts, u16 *tcp_options) { struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); +#define OPTION_TS BIT(1) + + + *tcp_options ^= OPTION_TS; if (subflow_req->mp_capable) { opts->suboptions = OPTION_MPTCP_MPC_SYNACK; opts->sndr_key = subflow_req->local_key; opts->csum_reqd = subflow_req->csum_reqd; opts->allow_join_id0 = subflow_req->allow_join_id0; - *size = TCPOLEN_MPTCP_MPC_SYNACK; + *size = TCPOLEN_MPTCP_MPC_SYNACK - TCPOLEN_TSTAMP_ALIGNED + TCPOLEN_SACKPERM_ALIGNED; pr_debug("subflow_req=%p, local_key=%llu", subflow_req, subflow_req->local_key); return true; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -XXX,XX +XXX,XX @@ static struct percpu_counter mptcp_sockets_allocated ____cacheline_aligned_in_sm static void __mptcp_destroy_sock(struct sock *sk); static void __mptcp_check_send_data_fin(struct sock *sk); +static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr, + int addr_len, int flags); DEFINE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions); static struct net_device mptcp_napi_dev; @@ -XXX,XX +XXX,XX @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk) } } +static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, + size_t len, struct mptcp_sock *msk, size_t copied) +{ + const struct iphdr *iph; + struct ubuf_info *uarg; + struct sockaddr *uaddr; + struct sk_buff *skb; + struct tcp_sock *tp; + struct socket *ssk; + int ret; + + ssk = __mptcp_nmpc_socket(msk); + if (unlikely(!ssk)) + goto out_EFAULT; + skb = tcp_stream_alloc_skb(ssk->sk, 0, ssk->sk->sk_allocation, true); + if (unlikely(!skb)) + goto out_EFAULT; + iph = ip_hdr(skb); + if (unlikely(!iph)) + goto out_EFAULT; + uarg = msg_zerocopy_realloc(sk, len, skb_zcopy(skb)); + if (unlikely(!uarg)) + goto out_EFAULT; + uaddr = msg->msg_name; + + tp = tcp_sk(ssk->sk); + if (unlikely(!tp)) + goto out_EFAULT; + if (!tp->fastopen_req) + tp->fastopen_req = kzalloc(sizeof(*tp->fastopen_req), ssk->sk->sk_allocation); + + if (unlikely(!tp->fastopen_req)) + goto out_EFAULT; + tp->fastopen_req->data = msg; + tp->fastopen_req->size = len; + tp->fastopen_req->uarg = uarg; + + /* requests a cookie */ + ret = mptcp_stream_connect(sk->sk_socket, uaddr, + msg->msg_namelen, msg->msg_flags); + + return ret; +out_EFAULT: + ret = -EFAULT; + return ret; +} + static void mptcp_set_nospace(struct sock *sk) { /* enable autotune */ @@ -XXX,XX +XXX,XX @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) int ret = 0; long timeo; - /* we don't support FASTOPEN yet */ + /* we don't fully support FASTOPEN yet */ if (msg->msg_flags & MSG_FASTOPEN) - return -EOPNOTSUPP; + ret = mptcp_sendmsg_fastopen(sk, msg, len, msk, copied); /* silently ignore everything else */ msg->msg_flags &= MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL; @@ -XXX,XX +XXX,XX @@ static void mptcp_worker(struct work_struct *work) if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) __mptcp_close_subflow(msk); - +/* if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags)) __mptcp_retrans(sk); - +*/ mptcp_mp_fail_no_response(msk); unlock: @@ -XXX,XX +XXX,XX @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how) case TCP_SYN_SENT: tcp_disconnect(ssk, O_NONBLOCK); break; + case TCP_ESTABLISHED: + break; default: if (__mptcp_check_fallback(mptcp_sk(sk))) { pr_debug("Fallback"); diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -XXX,XX +XXX,XX @@ static bool mptcp_supported_sockopt(int level, int optname) case TCP_TX_DELAY: case TCP_INQ: return true; + case TCP_FASTOPEN: + return true; } /* TCP_MD5SIG, TCP_MD5SIG_EXT are not supported, MD5 is not compatible with MPTCP */ @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_sol_tcp_defer(struct mptcp_sock *msk, sockptr_t optv return tcp_setsockopt(listener->sk, SOL_TCP, TCP_DEFER_ACCEPT, optval, optlen); } +static int mptcp_setsockopt_sol_tcp_fastopen(struct mptcp_sock *msk, sockptr_t optval, + unsigned int optlen) +{ + struct mptcp_subflow_context *subflow; + struct sock *sk = (struct sock *)msk; + struct net *net = sock_net(sk); + int val; + int ret; + + ret = 0; + + if (copy_from_sockptr(&val, optval, sizeof(val))) + return -EFAULT; + + lock_sock(sk); + + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + + lock_sock(ssk); + + if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE | + TCPF_LISTEN))) { + tcp_fastopen_init_key_once(net); + fastopen_queue_tune(sk, val); + } else { + ret = -EINVAL; + } + + release_sock(ssk); + } + + release_sock(sk); + + return ret; +} + static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, sockptr_t optval, unsigned int optlen) { @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, return mptcp_setsockopt_sol_tcp_nodelay(msk, optval, optlen); case TCP_DEFER_ACCEPT: return mptcp_setsockopt_sol_tcp_defer(msk, optval, optlen); + case TCP_FASTOPEN: + return mptcp_setsockopt_sol_tcp_fastopen(msk, optval, optlen); } return -EOPNOTSUPP; diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -XXX,XX +XXX,XX @@ static enum mapping_status get_mapping_status(struct sock *ssk, sk_eat_skb(ssk, skb); return MAPPING_EMPTY; } - +/* if (!subflow->map_valid) return MAPPING_INVALID; - +*/ goto validate_seq; } trace_get_mapping_status(mpext); data_len = mpext->data_len; + if (data_len == 0) { pr_debug("infinite mapping received"); MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX); @@ -XXX,XX +XXX,XX @@ static enum mapping_status get_mapping_status(struct sock *ssk, /* If this skb data are fully covered by the current mapping, * the new map would need caching, which is not supported */ + if (skb_is_fully_mapped(ssk, skb)) { MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSNOMATCH); return MAPPING_INVALID; @@ -XXX,XX +XXX,XX @@ static enum mapping_status get_mapping_status(struct sock *ssk, /* we revalidate valid mapping on new skb, because we must ensure * the current skb is completely covered by the available mapping */ + /* if (!validate_mapping(ssk, skb)) { MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSTCPMISMATCH); return MAPPING_INVALID; } - + */ skb_ext_del(skb, SKB_EXT_MPTCP); validate_csum: -- 2.25.1
[PATCH v8] includes "client-server" partial support for: 1. MPTCP cookie request from client (seems OK). 2. MPTCP cookie offering from server (seems OK). 3. MPTCP SYN+DATA+COOKIE from client (seems OK). 4. subsequent write + read on the opened socket (seems OK). ===Changes between v7 and v8 - We change from 0impact approach on existing TCP code to full reusage of existing and available functions. - Code is refactored (Max. reuse of existing linux kernel code). - fastopen.c is reduced to minima. - Other comments from mailing list are coming in the next version. ===Future work -Adress the appearance of "MPTCP FIN" as duplicated acks. -Integrate presented in the last patch selftests. Dmytro Shytyi (7): add mptcp_stream_connect to protocol.h add mptcp_setsockopt_fastopen reuse tcp_sendmsg_fastopen() mptfo variables for msk, options. Fix loop retrans Fix unxpctd val of subflow->map_seq(dscrd packet) add skb to mskq in tcp_fastopen_add_skb() selftests: mptfo initiator/listener include/net/tcp.h | 5 +- net/ipv4/tcp.c | 18 +++- net/ipv4/tcp_fastopen.c | 55 +++++++++-- net/ipv4/tcp_input.c | 11 ++- net/mptcp/Makefile | 2 +- net/mptcp/fastopen.c | 46 +++++++++ net/mptcp/options.c | 9 ++ net/mptcp/protocol.c | 19 ++-- net/mptcp/protocol.h | 14 ++- net/mptcp/sockopt.c | 3 + tools/testing/selftests/net/mptcp/mptfo.sh | 13 +++ .../selftests/net/mptcp/mptfo_initiator.c | 41 ++++++++ .../selftests/net/mptcp/mptfo_listener.c | 98 +++++++++++++++++++ 13 files changed, 311 insertions(+), 23 deletions(-) create mode 100644 net/mptcp/fastopen.c create mode 100644 tools/testing/selftests/net/mptcp/mptfo.sh create mode 100644 tools/testing/selftests/net/mptcp/mptfo_initiator.c create mode 100644 tools/testing/selftests/net/mptcp/mptfo_listener.c -- 2.25.1
In the following patches we will call mptcp_stream_connect() from function tcp_sendmsg_fastopen() in file "net/ipv4/tcp.c", thus make such symbol visible. Signed-off-by: Dmytro Shytyi <dmytro@shytyi.net> --- net/mptcp/protocol.c | 4 ++-- net/mptcp/protocol.h | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -XXX,XX +XXX,XX @@ static void mptcp_subflow_early_fallback(struct mptcp_sock *msk, __mptcp_do_fallback(msk); } -static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr, - int addr_len, int flags) +int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr, + int addr_len, int flags) { struct mptcp_sock *msk = mptcp_sk(sock->sk); struct mptcp_subflow_context *subflow; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -XXX,XX +XXX,XX @@ void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk, void mptcp_event_addr_announced(const struct sock *ssk, const struct mptcp_addr_info *info); void mptcp_event_addr_removed(const struct mptcp_sock *msk, u8 id); bool mptcp_userspace_pm_active(const struct mptcp_sock *msk); +int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags); static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk) { -- 2.25.1
Add set MPTFO socket option for MPTCP. Signed-off-by: Dmytro Shytyi <dmytro@shytyi.net> --- net/mptcp/Makefile | 2 +- net/mptcp/fastopen.c | 32 ++++++++++++++++++++++++++++++++ net/mptcp/protocol.h | 5 +++++ net/mptcp/sockopt.c | 3 +++ 4 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 net/mptcp/fastopen.c diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/Makefile +++ b/net/mptcp/Makefile @@ -XXX,XX +XXX,XX @@ obj-$(CONFIG_MPTCP) += mptcp.o mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o pm.o diag.o \ - mib.o pm_netlink.o sockopt.o pm_userspace.o sched.o + mib.o pm_netlink.o sockopt.o pm_userspace.o sched.o fastopen.o obj-$(CONFIG_SYN_COOKIES) += syncookies.o obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o diff --git a/net/mptcp/fastopen.c b/net/mptcp/fastopen.c new file mode 100644 index XXXXXXX..XXXXXXX --- /dev/null +++ b/net/mptcp/fastopen.c @@ -XXX,XX +XXX,XX @@ +/* SPDX-License-Identifier: GPL-2.0 + * MPTCP Fast Open Mechanism. Copyright (c) 2021-2022, Dmytro SHYTYI + */ + +#include "protocol.h" + +int mptcp_setsockopt_sol_tcp_fastopen(struct mptcp_sock *msk, sockptr_t optval, + unsigned int optlen) +{ + struct sock *sk = (struct sock *)msk; + struct net *net = sock_net(sk); + int val; + int ret; + + ret = 0; + + if (copy_from_sockptr(&val, optval, sizeof(val))) + return -EFAULT; + + lock_sock(sk); + + if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { + tcp_fastopen_init_key_once(net); + fastopen_queue_tune(sk, val); + } else { + ret = -EINVAL; + } + + release_sock(sk); + + return ret; +} diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -XXX,XX +XXX,XX @@ void mptcp_event_addr_removed(const struct mptcp_sock *msk, u8 id); bool mptcp_userspace_pm_active(const struct mptcp_sock *msk); int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags); +// Fast Open Mechanism functions begin +int mptcp_setsockopt_sol_tcp_fastopen(struct mptcp_sock *msk, sockptr_t optval, + unsigned int optlen); +// Fast Open Mechanism functions end + static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk) { return READ_ONCE(msk->pm.addr_signal) & diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -XXX,XX +XXX,XX @@ static bool mptcp_supported_sockopt(int level, int optname) case TCP_NOTSENT_LOWAT: case TCP_TX_DELAY: case TCP_INQ: + case TCP_FASTOPEN: return true; } @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, return mptcp_setsockopt_sol_tcp_nodelay(msk, optval, optlen); case TCP_DEFER_ACCEPT: return mptcp_setsockopt_sol_tcp_defer(msk, optval, optlen); + case TCP_FASTOPEN: + return mptcp_setsockopt_sol_tcp_fastopen(msk, optval, optlen); } return -EOPNOTSUPP; -- 2.25.1
In the following patches we will reuse modified tcp_sendmsg_fastopen(). We call it from mptcp_sendmsg(). Signed-off-by: Dmytro Shytyi <dmytro@shytyi.net> --- include/net/tcp.h | 3 +++ net/ipv4/tcp.c | 18 +++++++++++++----- net/mptcp/protocol.c | 11 +++++++++-- 3 files changed, 25 insertions(+), 7 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index XXXXXXX..XXXXXXX 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -XXX,XX +XXX,XX @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct tcp_fastopen_cookie *foc, const struct dst_entry *dst); +int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, + int *copied, size_t size, + struct ubuf_info *uarg); void tcp_fastopen_init_key_once(struct net *net); bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, struct tcp_fastopen_cookie *cookie); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index XXXXXXX..XXXXXXX 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -XXX,XX +XXX,XX @@ #include <asm/ioctls.h> #include <net/busy_poll.h> +#include <net/mptcp.h> +#include "../mptcp/protocol.h" + /* Track pending CMSGs. */ enum { TCP_CMSG_INQ = 1, @@ -XXX,XX +XXX,XX @@ void tcp_free_fastopen_req(struct tcp_sock *tp) } } -static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, - int *copied, size_t size, - struct ubuf_info *uarg) +int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, + int *copied, size_t size, + struct ubuf_info *uarg) { struct tcp_sock *tp = tcp_sk(sk); struct inet_sock *inet = inet_sk(sk); @@ -XXX,XX +XXX,XX @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, } } flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0; - err = __inet_stream_connect(sk->sk_socket, uaddr, - msg->msg_namelen, flags, 1); + if (!sk_is_mptcp(sk)) + err = __inet_stream_connect(sk->sk_socket, uaddr, + msg->msg_namelen, flags, 1); + else + err = mptcp_stream_connect(sk->sk_socket, uaddr, + msg->msg_namelen, msg->msg_flags); + /* fastopen_req could already be freed in __inet_stream_connect * if the connection times out or gets rst */ diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -XXX,XX +XXX,XX @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) long timeo; /* we don't support FASTOPEN yet */ - if (msg->msg_flags & MSG_FASTOPEN) - return -EOPNOTSUPP; + if (msg->msg_flags & MSG_FASTOPEN) { + struct socket *ssock = __mptcp_nmpc_socket(msk); + if (ssock) { + int copied_syn_fastopen = 0; + + ret = tcp_sendmsg_fastopen(ssock->sk, msg, &copied_syn_fastopen, len, NULL); + copied += copied_syn_fastopen; + } + } /* silently ignore everything else */ msg->msg_flags &= MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL; -- 2.25.1
Introduce mptfo variables for msk and options. Also fix the infinite retransmissions in the end of second session. The variable 2nd ack received in struct mptcp_options_received identifies the received ack on the listener side during 3way handshake in mptfo context and miningless alone if used alone. It is further used(checked) in conjunction in the same "if" statement with variable is_mptfo from struct mptcp_sock. Signed-off-by: Dmytro Shytyi <dmytro@shytyi.net> --- net/mptcp/fastopen.c | 14 ++++++++++++++ net/mptcp/options.c | 4 ++++ net/mptcp/protocol.h | 6 +++++- 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/net/mptcp/fastopen.c b/net/mptcp/fastopen.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/fastopen.c +++ b/net/mptcp/fastopen.c @@ -XXX,XX +XXX,XX @@ int mptcp_setsockopt_sol_tcp_fastopen(struct mptcp_sock *msk, sockptr_t optval, return ret; } + +void mptcp_gen_msk_ackseq_fastopen(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, + struct mptcp_options_received mp_opt) +{ + u64 ack_seq; + + msk->can_ack = true; + msk->remote_key = mp_opt.sndr_key; + mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq); + ack_seq++; + WRITE_ONCE(msk->ack_seq, ack_seq); + pr_debug("ack_seq=%llu sndr_key=%llu", msk->ack_seq, mp_opt.sndr_key); + atomic64_set(&msk->rcv_wnd_sent, ack_seq); +} diff --git a/net/mptcp/options.c b/net/mptcp/options.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -XXX,XX +XXX,XX @@ static void mptcp_parse_option(const struct sk_buff *skb, ptr += 8; } if (opsize >= TCPOLEN_MPTCP_MPC_ACK) { + mp_opt->hns_2nd_ack_rcvd = 1; mp_opt->rcvr_key = get_unaligned_be64(ptr); ptr += 8; } @@ -XXX,XX +XXX,XX @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) return sk->sk_state != TCP_CLOSE; if (unlikely(mp_opt.suboptions != OPTION_MPTCP_DSS)) { + if (mp_opt.suboptions & OPTIONS_MPTCP_MPC && mp_opt.hns_2nd_ack_rcvd && msk->is_mptfo) + mptcp_gen_msk_ackseq_fastopen(msk, subflow, mp_opt); + if ((mp_opt.suboptions & OPTION_MPTCP_FASTCLOSE) && msk->local_key == mp_opt.rcvr_key) { WRITE_ONCE(msk->rcv_fastclose, true); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -XXX,XX +XXX,XX @@ struct mptcp_options_received { echo:1, backup:1, deny_join_id0:1, - __unused:2; + __unused:1; + u8 hns_2nd_ack_rcvd:1; u8 join_id; u64 thmac; u8 hmac[MPTCPOPT_HMAC_LEN]; @@ -XXX,XX +XXX,XX @@ struct mptcp_sock { bool use_64bit_ack; /* Set when we received a 64-bit DSN */ bool csum_enabled; bool allow_infinite_fallback; + bool is_mptfo; u8 mpc_endpoint_id; u8 recvmsg_inq:1, cork:1, @@ -XXX,XX +XXX,XX @@ int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr, int addr_l // Fast Open Mechanism functions begin int mptcp_setsockopt_sol_tcp_fastopen(struct mptcp_sock *msk, sockptr_t optval, unsigned int optlen); +void mptcp_gen_msk_ackseq_fastopen(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, + struct mptcp_options_received mp_opt); // Fast Open Mechanism functions end static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk) -- 2.25.1
Fix unexpected value of subflow->map_seq (discarded and after retransmitted 2nd packet(1st after TFO)). We use mptcp_gen_msk_ackseq_fasopen() when we know this is the first chunk of data after TFO. Signed-off-by: Dmytro Shytyi <dmytro@shytyi.net> --- net/mptcp/options.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -XXX,XX +XXX,XX @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) mpext->dsn64 = 1; mpext->mpc_map = 1; mpext->data_fin = 0; + + if (msk->is_mptfo) { + mptcp_gen_msk_ackseq_fastopen(msk, subflow, mp_opt); + mpext->data_seq = READ_ONCE(msk->ack_seq); + } } else { mpext->data_seq = mp_opt.data_seq; mpext->subflow_seq = mp_opt.subflow_seq; -- 2.25.1
In the following patches we add skb to msk->receive_queue in the MPTCP fastopen context. Signed-off-by: Dmytro Shytyi <dmytro@shytyi.net> --- include/net/tcp.h | 2 +- net/ipv4/tcp_fastopen.c | 55 +++++++++++++++++++++++++++++++++++------ net/ipv4/tcp_input.c | 11 +++++++-- net/mptcp/protocol.c | 4 +-- net/mptcp/protocol.h | 2 ++ 5 files changed, 62 insertions(+), 12 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index XXXXXXX..XXXXXXX 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -XXX,XX +XXX,XX @@ int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk, void *primary_key, void *backup_key); int tcp_fastopen_get_cipher(struct net *net, struct inet_connection_sock *icsk, u64 *key); -void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb); +void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb, struct request_sock *req); struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct tcp_fastopen_cookie *foc, diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index XXXXXXX..XXXXXXX 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -XXX,XX +XXX,XX @@ #include <linux/tcp.h> #include <linux/rcupdate.h> #include <net/tcp.h> +#include "../mptcp/protocol.h" void tcp_fastopen_init_key_once(struct net *net) { @@ -XXX,XX +XXX,XX @@ static void tcp_fastopen_cookie_gen(struct sock *sk, /* If an incoming SYN or SYNACK frame contains a payload and/or FIN, * queue this additional data / FIN. */ -void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb) +void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb, struct request_sock *req) { + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + struct tcp_request_sock *tcp_r_sock = tcp_rsk(req); + struct sock *socket = mptcp_subflow_ctx(sk)->conn; + struct mptcp_sock *msk = mptcp_sk(socket); struct tcp_sock *tp = tcp_sk(sk); if (TCP_SKB_CB(skb)->end_seq == tp->rcv_nxt) @@ -XXX,XX +XXX,XX @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb) TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_SYN; tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; + + if (req && tp->syn_fastopen && sk_is_mptcp(sk)) + tcp_r_sock = tcp_rsk(req); + else + goto add_skb_to_sk; + + msk->is_mptfo = 1; + + //Solves: WARNING: at 704 _mptcp_move_skbs_from_subflow+0x5d0/0x651 + tp->copied_seq += tp->rcv_nxt - tcp_r_sock->rcv_isn - 1; + + subflow->map_seq = mptcp_subflow_get_mapped_dsn(subflow); + + //Solves: BAD mapping: ssn=0 map_seq=1 map_data_len=3 + subflow->ssn_offset = tp->copied_seq - 1; + + skb_orphan(skb); + skb->sk = socket; + skb->destructor = mptcp_rfree; + atomic_add(skb->truesize, &socket->sk_rmem_alloc); + msk->rmem_fwd_alloc -= skb->truesize; + + __skb_queue_tail(&msk->receive_queue, skb); + atomic64_set(&msk->rcv_wnd_sent, mptcp_subflow_get_mapped_dsn(subflow)); + goto avoid_add_skb_to_sk; +add_skb_to_sk: __skb_queue_tail(&sk->sk_receive_queue, skb); +avoid_add_skb_to_sk: tp->syn_data_acked = 1; /* u64_stats_update_begin(&tp->syncp) not needed here, @@ -XXX,XX +XXX,XX @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; - tcp_fastopen_add_skb(child, skb); + tcp_fastopen_add_skb(child, skb, req); tcp_rsk(req)->rcv_nxt = tp->rcv_nxt; tp->rcv_wup = tp->rcv_nxt; @@ -XXX,XX +XXX,XX @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1; int tcp_fastopen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen); struct tcp_fastopen_cookie valid_foc = { .len = -1 }; + struct tcp_sock *tp = tcp_sk(sk); struct sock *child; int ret = 0; if (foc->len == 0) /* Client requests a cookie */ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD); - if (!((tcp_fastopen & TFO_SERVER_ENABLE) && - (syn_data || foc->len >= 0) && - tcp_fastopen_queue_check(sk))) { - foc->len = -1; - return NULL; + if (tp->syn_fastopen && sk_is_mptcp(sk)) { + if (((syn_data || foc->len >= 0) && + tcp_fastopen_queue_check(sk))) { + foc->len = -1; + return NULL; + } + } else { + if (!((tcp_fastopen & TFO_SERVER_ENABLE) && + (syn_data || foc->len >= 0) && + tcp_fastopen_queue_check(sk))) { + foc->len = -1; + return NULL; + } } if (tcp_fastopen_no_cookie(sk, dst, TFO_SERVER_COOKIE_NOT_REQD)) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index XXXXXXX..XXXXXXX 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -XXX,XX +XXX,XX @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, --tp->delivered; } - tcp_fastopen_add_skb(sk, synack); + tcp_fastopen_add_skb(sk, synack, NULL); return false; } @@ -XXX,XX +XXX,XX @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, if (IS_ENABLED(CONFIG_SMC) && want_cookie) tmp_opt.smc_ok = 0; - tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; + if (foc.len == -1 && sk_is_mptcp(sk)) { + tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; + } else { + tmp_opt.tstamp_ok = 0; + tcp_rsk(req)->ts_off = 1; + tp->syn_fastopen = 1; + } + tcp_openreq_init(req, &tmp_opt, skb, sk); inet_rsk(req)->no_srccheck = inet_sk(sk)->transparent; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -XXX,XX +XXX,XX @@ static void __mptcp_rmem_reclaim(struct sock *sk, int amount) __sk_mem_reduce_allocated(sk, amount); } -static void mptcp_rmem_uncharge(struct sock *sk, int size) +void mptcp_rmem_uncharge(struct sock *sk, int size) { struct mptcp_sock *msk = mptcp_sk(sk); int reclaimable; @@ -XXX,XX +XXX,XX @@ static void mptcp_rmem_uncharge(struct sock *sk, int size) __mptcp_rmem_reclaim(sk, reclaimable); } -static void mptcp_rfree(struct sk_buff *skb) +void mptcp_rfree(struct sk_buff *skb) { unsigned int len = skb->truesize; struct sock *sk = skb->sk; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -XXX,XX +XXX,XX @@ void mptcp_event_addr_announced(const struct sock *ssk, const struct mptcp_addr_ void mptcp_event_addr_removed(const struct mptcp_sock *msk, u8 id); bool mptcp_userspace_pm_active(const struct mptcp_sock *msk); int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags); +void mptcp_rmem_uncharge(struct sock *sk, int size); +void mptcp_rfree(struct sk_buff *skb); // Fast Open Mechanism functions begin int mptcp_setsockopt_sol_tcp_fastopen(struct mptcp_sock *msk, sockptr_t optval, -- 2.25.1
MPTFO tests: these are examples of initiator (sendto) and listener, probably are going to be integrated to the mptcp_connect.* selftests Signed-off-by: Dmytro Shytyi <dmytro@shytyi.net> --- tools/testing/selftests/net/mptcp/mptfo.sh | 13 +++ .../selftests/net/mptcp/mptfo_initiator.c | 41 ++++++++ .../selftests/net/mptcp/mptfo_listener.c | 98 +++++++++++++++++++ 3 files changed, 152 insertions(+) create mode 100644 tools/testing/selftests/net/mptcp/mptfo.sh create mode 100644 tools/testing/selftests/net/mptcp/mptfo_initiator.c create mode 100644 tools/testing/selftests/net/mptcp/mptfo_listener.c diff --git a/tools/testing/selftests/net/mptcp/mptfo.sh b/tools/testing/selftests/net/mptcp/mptfo.sh new file mode 100644 index XXXXXXX..XXXXXXX --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptfo.sh @@ -XXX,XX +XXX,XX @@ +#!/bin/bash +#This is an example of environmen that was used to generate wireshark +sudo ip netns add server +sudo ip netns add client +sudo ip link add veth0 type veth peer name veth1 +sudo ip link set veth1 netns server +sudo ip link set veth0 netns client +sudo ip netns exec client ip a a 10.10.0.1/24 dev veth0 +sudo ip netns exec server ip a a 10.10.0.2/24 dev veth1 +sudo ip netns exec client ip link set dev veth0 up +sudo ip netns exec server ip link set dev veth1 up +sudo ip netns exec server bash -c "echo 2 > /proc/sys/net/ipv4/tcp_fastopen" +sudo ip netns exec client bash -c "echo 1 > /proc/sys/net/ipv4/tcp_fastopen" diff --git a/tools/testing/selftests/net/mptcp/mptfo_initiator.c b/tools/testing/selftests/net/mptcp/mptfo_initiator.c new file mode 100644 index XXXXXXX..XXXXXXX --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptfo_initiator.c @@ -XXX,XX +XXX,XX @@ +#include <arpa/inet.h> +#include <netinet/in.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/socket.h> +#include <unistd.h> +#include <netinet/tcp.h> +#include <string.h> +#include <signal.h> + +#define SERVER_PORT 7003 + +int main(int argc, char *argv[]) +{ + unsigned char valsyn[3] = "abc"; + struct sockaddr_in daddr; + char *valend = "fff"; + char *val1 = "zz1"; + char *val2 = "zz2"; + char *val3 = "zz3"; + int sock_fd = -1; + int ret; + + memset(&daddr, 0, sizeof(daddr)); + inet_pton(AF_INET, "10.10.0.2", &daddr.sin_addr); + daddr.sin_family = AF_INET; + daddr.sin_port = htons(SERVER_PORT); + + sock_fd = socket(AF_INET, SOCK_STREAM, IPPROTO_MPTCP); + + ret = sendto(sock_fd, valsyn, 3, MSG_FASTOPEN, (struct sockaddr *) &daddr, sizeof(daddr)); + ret = write(sock_fd, val1, 3); + ret = write(sock_fd, val2, 3); + ret = write(sock_fd, val2, 3); + ret = write(sock_fd, val2, 3); + ret = write(sock_fd, val3, 3); + ret = write(sock_fd, valend, 3); + + close(sock_fd); + return EXIT_SUCCESS; +} diff --git a/tools/testing/selftests/net/mptcp/mptfo_listener.c b/tools/testing/selftests/net/mptcp/mptfo_listener.c new file mode 100644 index XXXXXXX..XXXXXXX --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptfo_listener.c @@ -XXX,XX +XXX,XX @@ +#include <arpa/inet.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <unistd.h> +#include <netinet/in.h> +#include <linux/in.h> +#include <netinet/tcp.h> + +#define CLIENT_QUEUE_LEN 10 +#define SERVER_PORT 7003 + +int main(void) +{ + int listen_sock_fd = -1, client_sock_fd = -1; + char str_addr[INET6_ADDRSTRLEN]; + struct sockaddr_in server_addr; + int ret, flag; + int qlen = 5; + char ch; + + server_addr.sin_family = AF_INET; + inet_pton(AF_INET, "10.10.0.2", &server_addr.sin_addr); + server_addr.sin_port = htons(SERVER_PORT); + + /* Create socket for listening (client requests) */ + listen_sock_fd = socket(AF_INET, SOCK_STREAM, IPPROTO_MPTCP); + if (listen_sock_fd == -1) { + perror("socket()server"); + return EXIT_FAILURE; + } + + /* Set socket to reuse address */ + flag = 1; + ret = setsockopt(listen_sock_fd, SOL_SOCKET, SO_REUSEADDR, &flag, sizeof(flag)); + if (ret == -1) { + perror("setsockopt()"); + return EXIT_FAILURE; + } + + ret = setsockopt(listen_sock_fd, SOL_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen)); + if (ret == -1) { + perror("setsockopt()TCP_FASTOPEN"); + return EXIT_FAILURE; + } + + /* Bind address and socket together */ + ret = bind(listen_sock_fd, (struct sockaddr *)&server_addr, sizeof(server_addr)); + if (ret == -1) { + perror("bind()"); + close(listen_sock_fd); + return EXIT_FAILURE; + } + + /* Create listening queue (client requests) */ + ret = listen(listen_sock_fd, CLIENT_QUEUE_LEN); + if (ret == -1) { + perror("listen()"); + close(listen_sock_fd); + return EXIT_FAILURE; + } + perror("Server listening"); + while (1) { + /* Do TCP handshake with client */ + client_sock_fd = accept(listen_sock_fd, + NULL, + 0); + if (client_sock_fd == -1) { + perror("accept()"); + close(listen_sock_fd); + return EXIT_FAILURE; + } else { + perror("ACCEPT_SUCCESS"); + } + + char rb[1024]; + + while (1) { + ret = read(client_sock_fd, rb, 3); + + if (ret == -1) { + perror("SERVVERread()"); + close(client_sock_fd); + break; + } else { + fprintf(stderr, "received %c%c%c from client", rb[0], rb[1], rb[2]); + } + if (rb[0] == 'f' && rb[1] == 'f' && rb[2] == 'f') { + close(client_sock_fd); + break; + } + + } + } + + return EXIT_SUCCESS; +} -- 2.25.1