:p
atchew
Login
[PATCH v8] includes "client-server" partial support for: 1. MPTCP cookie request from client (seems OK). 2. MPTCP cookie offering from server (seems OK). 3. MPTCP SYN+DATA+COOKIE from client (seems OK). 4. subsequent write + read on the opened socket (seems OK). ===Changes between v7 and v8 - We change from 0impact approach on existing TCP code to full reusage of existing and available functions. - Code is refactored (Max. reuse of existing linux kernel code). - fastopen.c is reduced to minima. - Other comments from mailing list are coming in the next version. ===Future work -Adress the appearance of "MPTCP FIN" as duplicated acks. -Integrate presented in the last patch selftests. Dmytro Shytyi (7): add mptcp_stream_connect to protocol.h add mptcp_setsockopt_fastopen reuse tcp_sendmsg_fastopen() mptfo variables for msk, options. Fix loop retrans Fix unxpctd val of subflow->map_seq(dscrd packet) add skb to mskq in tcp_fastopen_add_skb() selftests: mptfo initiator/listener include/net/tcp.h | 5 +- net/ipv4/tcp.c | 18 +++- net/ipv4/tcp_fastopen.c | 55 +++++++++-- net/ipv4/tcp_input.c | 11 ++- net/mptcp/Makefile | 2 +- net/mptcp/fastopen.c | 46 +++++++++ net/mptcp/options.c | 9 ++ net/mptcp/protocol.c | 19 ++-- net/mptcp/protocol.h | 14 ++- net/mptcp/sockopt.c | 3 + tools/testing/selftests/net/mptcp/mptfo.sh | 13 +++ .../selftests/net/mptcp/mptfo_initiator.c | 41 ++++++++ .../selftests/net/mptcp/mptfo_listener.c | 98 +++++++++++++++++++ 13 files changed, 311 insertions(+), 23 deletions(-) create mode 100644 net/mptcp/fastopen.c create mode 100644 tools/testing/selftests/net/mptcp/mptfo.sh create mode 100644 tools/testing/selftests/net/mptcp/mptfo_initiator.c create mode 100644 tools/testing/selftests/net/mptcp/mptfo_listener.c -- 2.25.1
In the following patches we will call mptcp_stream_connect() from function tcp_sendmsg_fastopen() in file "net/ipv4/tcp.c", thus make such symbol visible. Signed-off-by: Dmytro Shytyi <dmytro@shytyi.net> --- net/mptcp/protocol.c | 4 ++-- net/mptcp/protocol.h | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -XXX,XX +XXX,XX @@ static void mptcp_subflow_early_fallback(struct mptcp_sock *msk, __mptcp_do_fallback(msk); } -static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr, - int addr_len, int flags) +int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr, + int addr_len, int flags) { struct mptcp_sock *msk = mptcp_sk(sock->sk); struct mptcp_subflow_context *subflow; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -XXX,XX +XXX,XX @@ void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk, void mptcp_event_addr_announced(const struct sock *ssk, const struct mptcp_addr_info *info); void mptcp_event_addr_removed(const struct mptcp_sock *msk, u8 id); bool mptcp_userspace_pm_active(const struct mptcp_sock *msk); +int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags); static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk) { -- 2.25.1
Add set MPTFO socket option for MPTCP. Signed-off-by: Dmytro Shytyi <dmytro@shytyi.net> --- net/mptcp/Makefile | 2 +- net/mptcp/fastopen.c | 32 ++++++++++++++++++++++++++++++++ net/mptcp/protocol.h | 5 +++++ net/mptcp/sockopt.c | 3 +++ 4 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 net/mptcp/fastopen.c diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/Makefile +++ b/net/mptcp/Makefile @@ -XXX,XX +XXX,XX @@ obj-$(CONFIG_MPTCP) += mptcp.o mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o pm.o diag.o \ - mib.o pm_netlink.o sockopt.o pm_userspace.o sched.o + mib.o pm_netlink.o sockopt.o pm_userspace.o sched.o fastopen.o obj-$(CONFIG_SYN_COOKIES) += syncookies.o obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o diff --git a/net/mptcp/fastopen.c b/net/mptcp/fastopen.c new file mode 100644 index XXXXXXX..XXXXXXX --- /dev/null +++ b/net/mptcp/fastopen.c @@ -XXX,XX +XXX,XX @@ +/* SPDX-License-Identifier: GPL-2.0 + * MPTCP Fast Open Mechanism. Copyright (c) 2021-2022, Dmytro SHYTYI + */ + +#include "protocol.h" + +int mptcp_setsockopt_sol_tcp_fastopen(struct mptcp_sock *msk, sockptr_t optval, + unsigned int optlen) +{ + struct sock *sk = (struct sock *)msk; + struct net *net = sock_net(sk); + int val; + int ret; + + ret = 0; + + if (copy_from_sockptr(&val, optval, sizeof(val))) + return -EFAULT; + + lock_sock(sk); + + if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { + tcp_fastopen_init_key_once(net); + fastopen_queue_tune(sk, val); + } else { + ret = -EINVAL; + } + + release_sock(sk); + + return ret; +} diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -XXX,XX +XXX,XX @@ void mptcp_event_addr_removed(const struct mptcp_sock *msk, u8 id); bool mptcp_userspace_pm_active(const struct mptcp_sock *msk); int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags); +// Fast Open Mechanism functions begin +int mptcp_setsockopt_sol_tcp_fastopen(struct mptcp_sock *msk, sockptr_t optval, + unsigned int optlen); +// Fast Open Mechanism functions end + static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk) { return READ_ONCE(msk->pm.addr_signal) & diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -XXX,XX +XXX,XX @@ static bool mptcp_supported_sockopt(int level, int optname) case TCP_NOTSENT_LOWAT: case TCP_TX_DELAY: case TCP_INQ: + case TCP_FASTOPEN: return true; } @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, return mptcp_setsockopt_sol_tcp_nodelay(msk, optval, optlen); case TCP_DEFER_ACCEPT: return mptcp_setsockopt_sol_tcp_defer(msk, optval, optlen); + case TCP_FASTOPEN: + return mptcp_setsockopt_sol_tcp_fastopen(msk, optval, optlen); } return -EOPNOTSUPP; -- 2.25.1
In the following patches we will reuse modified tcp_sendmsg_fastopen(). We call it from mptcp_sendmsg(). Signed-off-by: Dmytro Shytyi <dmytro@shytyi.net> --- include/net/tcp.h | 3 +++ net/ipv4/tcp.c | 18 +++++++++++++----- net/mptcp/protocol.c | 11 +++++++++-- 3 files changed, 25 insertions(+), 7 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index XXXXXXX..XXXXXXX 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -XXX,XX +XXX,XX @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct tcp_fastopen_cookie *foc, const struct dst_entry *dst); +int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, + int *copied, size_t size, + struct ubuf_info *uarg); void tcp_fastopen_init_key_once(struct net *net); bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, struct tcp_fastopen_cookie *cookie); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index XXXXXXX..XXXXXXX 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -XXX,XX +XXX,XX @@ #include <asm/ioctls.h> #include <net/busy_poll.h> +#include <net/mptcp.h> +#include "../mptcp/protocol.h" + /* Track pending CMSGs. */ enum { TCP_CMSG_INQ = 1, @@ -XXX,XX +XXX,XX @@ void tcp_free_fastopen_req(struct tcp_sock *tp) } } -static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, - int *copied, size_t size, - struct ubuf_info *uarg) +int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, + int *copied, size_t size, + struct ubuf_info *uarg) { struct tcp_sock *tp = tcp_sk(sk); struct inet_sock *inet = inet_sk(sk); @@ -XXX,XX +XXX,XX @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, } } flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0; - err = __inet_stream_connect(sk->sk_socket, uaddr, - msg->msg_namelen, flags, 1); + if (!sk_is_mptcp(sk)) + err = __inet_stream_connect(sk->sk_socket, uaddr, + msg->msg_namelen, flags, 1); + else + err = mptcp_stream_connect(sk->sk_socket, uaddr, + msg->msg_namelen, msg->msg_flags); + /* fastopen_req could already be freed in __inet_stream_connect * if the connection times out or gets rst */ diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -XXX,XX +XXX,XX @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) long timeo; /* we don't support FASTOPEN yet */ - if (msg->msg_flags & MSG_FASTOPEN) - return -EOPNOTSUPP; + if (msg->msg_flags & MSG_FASTOPEN) { + struct socket *ssock = __mptcp_nmpc_socket(msk); + if (ssock) { + int copied_syn_fastopen = 0; + + ret = tcp_sendmsg_fastopen(ssock->sk, msg, &copied_syn_fastopen, len, NULL); + copied += copied_syn_fastopen; + } + } /* silently ignore everything else */ msg->msg_flags &= MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL; -- 2.25.1
Introduce mptfo variables for msk and options. Also fix the infinite retransmissions in the end of second session. The variable 2nd ack received in struct mptcp_options_received identifies the received ack on the listener side during 3way handshake in mptfo context and miningless alone if used alone. It is further used(checked) in conjunction in the same "if" statement with variable is_mptfo from struct mptcp_sock. Signed-off-by: Dmytro Shytyi <dmytro@shytyi.net> --- net/mptcp/fastopen.c | 14 ++++++++++++++ net/mptcp/options.c | 4 ++++ net/mptcp/protocol.h | 6 +++++- 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/net/mptcp/fastopen.c b/net/mptcp/fastopen.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/fastopen.c +++ b/net/mptcp/fastopen.c @@ -XXX,XX +XXX,XX @@ int mptcp_setsockopt_sol_tcp_fastopen(struct mptcp_sock *msk, sockptr_t optval, return ret; } + +void mptcp_gen_msk_ackseq_fastopen(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, + struct mptcp_options_received mp_opt) +{ + u64 ack_seq; + + msk->can_ack = true; + msk->remote_key = mp_opt.sndr_key; + mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq); + ack_seq++; + WRITE_ONCE(msk->ack_seq, ack_seq); + pr_debug("ack_seq=%llu sndr_key=%llu", msk->ack_seq, mp_opt.sndr_key); + atomic64_set(&msk->rcv_wnd_sent, ack_seq); +} diff --git a/net/mptcp/options.c b/net/mptcp/options.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -XXX,XX +XXX,XX @@ static void mptcp_parse_option(const struct sk_buff *skb, ptr += 8; } if (opsize >= TCPOLEN_MPTCP_MPC_ACK) { + mp_opt->hns_2nd_ack_rcvd = 1; mp_opt->rcvr_key = get_unaligned_be64(ptr); ptr += 8; } @@ -XXX,XX +XXX,XX @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) return sk->sk_state != TCP_CLOSE; if (unlikely(mp_opt.suboptions != OPTION_MPTCP_DSS)) { + if (mp_opt.suboptions & OPTIONS_MPTCP_MPC && mp_opt.hns_2nd_ack_rcvd && msk->is_mptfo) + mptcp_gen_msk_ackseq_fastopen(msk, subflow, mp_opt); + if ((mp_opt.suboptions & OPTION_MPTCP_FASTCLOSE) && msk->local_key == mp_opt.rcvr_key) { WRITE_ONCE(msk->rcv_fastclose, true); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -XXX,XX +XXX,XX @@ struct mptcp_options_received { echo:1, backup:1, deny_join_id0:1, - __unused:2; + __unused:1; + u8 hns_2nd_ack_rcvd:1; u8 join_id; u64 thmac; u8 hmac[MPTCPOPT_HMAC_LEN]; @@ -XXX,XX +XXX,XX @@ struct mptcp_sock { bool use_64bit_ack; /* Set when we received a 64-bit DSN */ bool csum_enabled; bool allow_infinite_fallback; + bool is_mptfo; u8 mpc_endpoint_id; u8 recvmsg_inq:1, cork:1, @@ -XXX,XX +XXX,XX @@ int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr, int addr_l // Fast Open Mechanism functions begin int mptcp_setsockopt_sol_tcp_fastopen(struct mptcp_sock *msk, sockptr_t optval, unsigned int optlen); +void mptcp_gen_msk_ackseq_fastopen(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, + struct mptcp_options_received mp_opt); // Fast Open Mechanism functions end static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk) -- 2.25.1
Fix unexpected value of subflow->map_seq (discarded and after retransmitted 2nd packet(1st after TFO)). We use mptcp_gen_msk_ackseq_fasopen() when we know this is the first chunk of data after TFO. Signed-off-by: Dmytro Shytyi <dmytro@shytyi.net> --- net/mptcp/options.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -XXX,XX +XXX,XX @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) mpext->dsn64 = 1; mpext->mpc_map = 1; mpext->data_fin = 0; + + if (msk->is_mptfo) { + mptcp_gen_msk_ackseq_fastopen(msk, subflow, mp_opt); + mpext->data_seq = READ_ONCE(msk->ack_seq); + } } else { mpext->data_seq = mp_opt.data_seq; mpext->subflow_seq = mp_opt.subflow_seq; -- 2.25.1
In the following patches we add skb to msk->receive_queue in the MPTCP fastopen context. Signed-off-by: Dmytro Shytyi <dmytro@shytyi.net> --- include/net/tcp.h | 2 +- net/ipv4/tcp_fastopen.c | 55 +++++++++++++++++++++++++++++++++++------ net/ipv4/tcp_input.c | 11 +++++++-- net/mptcp/protocol.c | 4 +-- net/mptcp/protocol.h | 2 ++ 5 files changed, 62 insertions(+), 12 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index XXXXXXX..XXXXXXX 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -XXX,XX +XXX,XX @@ int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk, void *primary_key, void *backup_key); int tcp_fastopen_get_cipher(struct net *net, struct inet_connection_sock *icsk, u64 *key); -void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb); +void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb, struct request_sock *req); struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct tcp_fastopen_cookie *foc, diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index XXXXXXX..XXXXXXX 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -XXX,XX +XXX,XX @@ #include <linux/tcp.h> #include <linux/rcupdate.h> #include <net/tcp.h> +#include "../mptcp/protocol.h" void tcp_fastopen_init_key_once(struct net *net) { @@ -XXX,XX +XXX,XX @@ static void tcp_fastopen_cookie_gen(struct sock *sk, /* If an incoming SYN or SYNACK frame contains a payload and/or FIN, * queue this additional data / FIN. */ -void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb) +void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb, struct request_sock *req) { + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + struct tcp_request_sock *tcp_r_sock = tcp_rsk(req); + struct sock *socket = mptcp_subflow_ctx(sk)->conn; + struct mptcp_sock *msk = mptcp_sk(socket); struct tcp_sock *tp = tcp_sk(sk); if (TCP_SKB_CB(skb)->end_seq == tp->rcv_nxt) @@ -XXX,XX +XXX,XX @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb) TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_SYN; tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; + + if (req && tp->syn_fastopen && sk_is_mptcp(sk)) + tcp_r_sock = tcp_rsk(req); + else + goto add_skb_to_sk; + + msk->is_mptfo = 1; + + //Solves: WARNING: at 704 _mptcp_move_skbs_from_subflow+0x5d0/0x651 + tp->copied_seq += tp->rcv_nxt - tcp_r_sock->rcv_isn - 1; + + subflow->map_seq = mptcp_subflow_get_mapped_dsn(subflow); + + //Solves: BAD mapping: ssn=0 map_seq=1 map_data_len=3 + subflow->ssn_offset = tp->copied_seq - 1; + + skb_orphan(skb); + skb->sk = socket; + skb->destructor = mptcp_rfree; + atomic_add(skb->truesize, &socket->sk_rmem_alloc); + msk->rmem_fwd_alloc -= skb->truesize; + + __skb_queue_tail(&msk->receive_queue, skb); + atomic64_set(&msk->rcv_wnd_sent, mptcp_subflow_get_mapped_dsn(subflow)); + goto avoid_add_skb_to_sk; +add_skb_to_sk: __skb_queue_tail(&sk->sk_receive_queue, skb); +avoid_add_skb_to_sk: tp->syn_data_acked = 1; /* u64_stats_update_begin(&tp->syncp) not needed here, @@ -XXX,XX +XXX,XX @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; - tcp_fastopen_add_skb(child, skb); + tcp_fastopen_add_skb(child, skb, req); tcp_rsk(req)->rcv_nxt = tp->rcv_nxt; tp->rcv_wup = tp->rcv_nxt; @@ -XXX,XX +XXX,XX @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1; int tcp_fastopen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen); struct tcp_fastopen_cookie valid_foc = { .len = -1 }; + struct tcp_sock *tp = tcp_sk(sk); struct sock *child; int ret = 0; if (foc->len == 0) /* Client requests a cookie */ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD); - if (!((tcp_fastopen & TFO_SERVER_ENABLE) && - (syn_data || foc->len >= 0) && - tcp_fastopen_queue_check(sk))) { - foc->len = -1; - return NULL; + if (tp->syn_fastopen && sk_is_mptcp(sk)) { + if (((syn_data || foc->len >= 0) && + tcp_fastopen_queue_check(sk))) { + foc->len = -1; + return NULL; + } + } else { + if (!((tcp_fastopen & TFO_SERVER_ENABLE) && + (syn_data || foc->len >= 0) && + tcp_fastopen_queue_check(sk))) { + foc->len = -1; + return NULL; + } } if (tcp_fastopen_no_cookie(sk, dst, TFO_SERVER_COOKIE_NOT_REQD)) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index XXXXXXX..XXXXXXX 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -XXX,XX +XXX,XX @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, --tp->delivered; } - tcp_fastopen_add_skb(sk, synack); + tcp_fastopen_add_skb(sk, synack, NULL); return false; } @@ -XXX,XX +XXX,XX @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, if (IS_ENABLED(CONFIG_SMC) && want_cookie) tmp_opt.smc_ok = 0; - tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; + if (foc.len == -1 && sk_is_mptcp(sk)) { + tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; + } else { + tmp_opt.tstamp_ok = 0; + tcp_rsk(req)->ts_off = 1; + tp->syn_fastopen = 1; + } + tcp_openreq_init(req, &tmp_opt, skb, sk); inet_rsk(req)->no_srccheck = inet_sk(sk)->transparent; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -XXX,XX +XXX,XX @@ static void __mptcp_rmem_reclaim(struct sock *sk, int amount) __sk_mem_reduce_allocated(sk, amount); } -static void mptcp_rmem_uncharge(struct sock *sk, int size) +void mptcp_rmem_uncharge(struct sock *sk, int size) { struct mptcp_sock *msk = mptcp_sk(sk); int reclaimable; @@ -XXX,XX +XXX,XX @@ static void mptcp_rmem_uncharge(struct sock *sk, int size) __mptcp_rmem_reclaim(sk, reclaimable); } -static void mptcp_rfree(struct sk_buff *skb) +void mptcp_rfree(struct sk_buff *skb) { unsigned int len = skb->truesize; struct sock *sk = skb->sk; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -XXX,XX +XXX,XX @@ void mptcp_event_addr_announced(const struct sock *ssk, const struct mptcp_addr_ void mptcp_event_addr_removed(const struct mptcp_sock *msk, u8 id); bool mptcp_userspace_pm_active(const struct mptcp_sock *msk); int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags); +void mptcp_rmem_uncharge(struct sock *sk, int size); +void mptcp_rfree(struct sk_buff *skb); // Fast Open Mechanism functions begin int mptcp_setsockopt_sol_tcp_fastopen(struct mptcp_sock *msk, sockptr_t optval, -- 2.25.1
MPTFO tests: these are examples of initiator (sendto) and listener, probably are going to be integrated to the mptcp_connect.* selftests Signed-off-by: Dmytro Shytyi <dmytro@shytyi.net> --- tools/testing/selftests/net/mptcp/mptfo.sh | 13 +++ .../selftests/net/mptcp/mptfo_initiator.c | 41 ++++++++ .../selftests/net/mptcp/mptfo_listener.c | 98 +++++++++++++++++++ 3 files changed, 152 insertions(+) create mode 100644 tools/testing/selftests/net/mptcp/mptfo.sh create mode 100644 tools/testing/selftests/net/mptcp/mptfo_initiator.c create mode 100644 tools/testing/selftests/net/mptcp/mptfo_listener.c diff --git a/tools/testing/selftests/net/mptcp/mptfo.sh b/tools/testing/selftests/net/mptcp/mptfo.sh new file mode 100644 index XXXXXXX..XXXXXXX --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptfo.sh @@ -XXX,XX +XXX,XX @@ +#!/bin/bash +#This is an example of environmen that was used to generate wireshark +sudo ip netns add server +sudo ip netns add client +sudo ip link add veth0 type veth peer name veth1 +sudo ip link set veth1 netns server +sudo ip link set veth0 netns client +sudo ip netns exec client ip a a 10.10.0.1/24 dev veth0 +sudo ip netns exec server ip a a 10.10.0.2/24 dev veth1 +sudo ip netns exec client ip link set dev veth0 up +sudo ip netns exec server ip link set dev veth1 up +sudo ip netns exec server bash -c "echo 2 > /proc/sys/net/ipv4/tcp_fastopen" +sudo ip netns exec client bash -c "echo 1 > /proc/sys/net/ipv4/tcp_fastopen" diff --git a/tools/testing/selftests/net/mptcp/mptfo_initiator.c b/tools/testing/selftests/net/mptcp/mptfo_initiator.c new file mode 100644 index XXXXXXX..XXXXXXX --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptfo_initiator.c @@ -XXX,XX +XXX,XX @@ +#include <arpa/inet.h> +#include <netinet/in.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/socket.h> +#include <unistd.h> +#include <netinet/tcp.h> +#include <string.h> +#include <signal.h> + +#define SERVER_PORT 7003 + +int main(int argc, char *argv[]) +{ + unsigned char valsyn[3] = "abc"; + struct sockaddr_in daddr; + char *valend = "fff"; + char *val1 = "zz1"; + char *val2 = "zz2"; + char *val3 = "zz3"; + int sock_fd = -1; + int ret; + + memset(&daddr, 0, sizeof(daddr)); + inet_pton(AF_INET, "10.10.0.2", &daddr.sin_addr); + daddr.sin_family = AF_INET; + daddr.sin_port = htons(SERVER_PORT); + + sock_fd = socket(AF_INET, SOCK_STREAM, IPPROTO_MPTCP); + + ret = sendto(sock_fd, valsyn, 3, MSG_FASTOPEN, (struct sockaddr *) &daddr, sizeof(daddr)); + ret = write(sock_fd, val1, 3); + ret = write(sock_fd, val2, 3); + ret = write(sock_fd, val2, 3); + ret = write(sock_fd, val2, 3); + ret = write(sock_fd, val3, 3); + ret = write(sock_fd, valend, 3); + + close(sock_fd); + return EXIT_SUCCESS; +} diff --git a/tools/testing/selftests/net/mptcp/mptfo_listener.c b/tools/testing/selftests/net/mptcp/mptfo_listener.c new file mode 100644 index XXXXXXX..XXXXXXX --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptfo_listener.c @@ -XXX,XX +XXX,XX @@ +#include <arpa/inet.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <unistd.h> +#include <netinet/in.h> +#include <linux/in.h> +#include <netinet/tcp.h> + +#define CLIENT_QUEUE_LEN 10 +#define SERVER_PORT 7003 + +int main(void) +{ + int listen_sock_fd = -1, client_sock_fd = -1; + char str_addr[INET6_ADDRSTRLEN]; + struct sockaddr_in server_addr; + int ret, flag; + int qlen = 5; + char ch; + + server_addr.sin_family = AF_INET; + inet_pton(AF_INET, "10.10.0.2", &server_addr.sin_addr); + server_addr.sin_port = htons(SERVER_PORT); + + /* Create socket for listening (client requests) */ + listen_sock_fd = socket(AF_INET, SOCK_STREAM, IPPROTO_MPTCP); + if (listen_sock_fd == -1) { + perror("socket()server"); + return EXIT_FAILURE; + } + + /* Set socket to reuse address */ + flag = 1; + ret = setsockopt(listen_sock_fd, SOL_SOCKET, SO_REUSEADDR, &flag, sizeof(flag)); + if (ret == -1) { + perror("setsockopt()"); + return EXIT_FAILURE; + } + + ret = setsockopt(listen_sock_fd, SOL_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen)); + if (ret == -1) { + perror("setsockopt()TCP_FASTOPEN"); + return EXIT_FAILURE; + } + + /* Bind address and socket together */ + ret = bind(listen_sock_fd, (struct sockaddr *)&server_addr, sizeof(server_addr)); + if (ret == -1) { + perror("bind()"); + close(listen_sock_fd); + return EXIT_FAILURE; + } + + /* Create listening queue (client requests) */ + ret = listen(listen_sock_fd, CLIENT_QUEUE_LEN); + if (ret == -1) { + perror("listen()"); + close(listen_sock_fd); + return EXIT_FAILURE; + } + perror("Server listening"); + while (1) { + /* Do TCP handshake with client */ + client_sock_fd = accept(listen_sock_fd, + NULL, + 0); + if (client_sock_fd == -1) { + perror("accept()"); + close(listen_sock_fd); + return EXIT_FAILURE; + } else { + perror("ACCEPT_SUCCESS"); + } + + char rb[1024]; + + while (1) { + ret = read(client_sock_fd, rb, 3); + + if (ret == -1) { + perror("SERVVERread()"); + close(client_sock_fd); + break; + } else { + fprintf(stderr, "received %c%c%c from client", rb[0], rb[1], rb[2]); + } + if (rb[0] == 'f' && rb[1] == 'f' && rb[2] == 'f') { + close(client_sock_fd); + break; + } + + } + } + + return EXIT_SUCCESS; +} -- 2.25.1
These patches focus on the Initiator and partially on Listener side. The next options in userspace are available: a) sendto(..., ..., ..., MSG_FASTOPEN, ..., ...); b) setsockopt(..., SOL_TCP, TCP_FASTOPEN, ..., ...); These patches implement Appendix-B of RFC8684 (MPTFO). We would like to credit Paulo Abeni, Mat Martineau, Matthieu Baerts and Benjamin Hesmans for advices and ideas that improved these patches. Signed-off-by: Dmytro Shytyi <dmytro@shytyi.net> --- v13 -> v14: - rebase on top of Paolo's patch: mptcp: factor out mptcp_connect(). - add IPv6 only check for subflow_v6_send_synack(). - new helper for sockopt (first subflow only) is used. - integrate selftests. - modify treq. --- Dmytro Shytyi (5): mptcp: introduce MSG_FASTOPEN flag. mptcp: implement delayed seq generation for passive fastopen mptcp: add subflow_v(4,6)_send_synack() mptcp: add TCP_FASTOPEN sock option selftests: mptcp: mptfo Initiator/Listener net/mptcp/Makefile | 2 +- net/mptcp/fastopen.c | 97 ++++++++++++ net/mptcp/options.c | 5 + net/mptcp/protocol.c | 11 +- net/mptcp/protocol.h | 9 ++ net/mptcp/sockopt.c | 5 +- net/mptcp/subflow.c | 42 ++++++ .../selftests/net/mptcp/mptcp_connect.c | 140 +++++++++++++----- .../selftests/net/mptcp/mptcp_connect.sh | 77 ++++++++++ 9 files changed, 346 insertions(+), 42 deletions(-) create mode 100644 net/mptcp/fastopen.c -- 2.34.1
In the following patches we will analyse the MSG_FASTOPEN flag in the mptcp_sendmsg() and invoke the MPTFO. Signed-of-by: Benjamin Hesmans <benjamin.hesmans@tessares.net> Signed-off-by: Dmytro Shytyi <dmytro@shytyi.net> --- net/mptcp/protocol.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -XXX,XX +XXX,XX @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) int ret = 0; long timeo; - /* we don't support FASTOPEN yet */ - if (msg->msg_flags & MSG_FASTOPEN) - return -EOPNOTSUPP; - /* silently ignore everything else */ - msg->msg_flags &= MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL; + msg->msg_flags &= MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_FASTOPEN; lock_sock(sk); ssock = __mptcp_nmpc_socket(msk); - if (unlikely(ssock && inet_sk(ssock->sk)->defer_connect)) { + if (unlikely(ssock && (inet_sk(ssock->sk)->defer_connect || + msg->msg_flags & MSG_FASTOPEN))) { struct sock *ssk = ssock->sk; int copied_syn = 0; -- 2.34.1
With fastopen in place the first subflow socket is created before the MPC handshake completes, and we need to properly initialize the sequence numbers at MPC ACK reception. Signed-off-by: Dmytro Shytyi <dmytro@shytyi.net> --- net/mptcp/Makefile | 2 +- net/mptcp/fastopen.c | 19 +++++++++++++++++++ net/mptcp/options.c | 5 +++++ net/mptcp/protocol.h | 6 ++++++ 4 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 net/mptcp/fastopen.c diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/Makefile +++ b/net/mptcp/Makefile @@ -XXX,XX +XXX,XX @@ obj-$(CONFIG_MPTCP) += mptcp.o mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o pm.o diag.o \ - mib.o pm_netlink.o sockopt.o pm_userspace.o sched.o + mib.o pm_netlink.o sockopt.o pm_userspace.o sched.o fastopen.o obj-$(CONFIG_SYN_COOKIES) += syncookies.o obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o diff --git a/net/mptcp/fastopen.c b/net/mptcp/fastopen.c new file mode 100644 index XXXXXXX..XXXXXXX --- /dev/null +++ b/net/mptcp/fastopen.c @@ -XXX,XX +XXX,XX @@ +/* SPDX-License-Identifier: GPL-2.0 + * MPTCP Fast Open Mechanism. Copyright (c) 2021-2022, Dmytro SHYTYI + */ + +#include "protocol.h" + +void mptcp_gen_msk_ackseq_fastopen(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, + struct mptcp_options_received mp_opt) +{ + u64 ack_seq; + + WRITE_ONCE(msk->can_ack, true); + WRITE_ONCE(msk->remote_key, mp_opt.sndr_key); + mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq); + ack_seq++; + WRITE_ONCE(msk->ack_seq, ack_seq); + pr_debug("ack_seq=%llu sndr_key=%llu", msk->ack_seq, mp_opt.sndr_key); + atomic64_set(&msk->rcv_wnd_sent, ack_seq); +} diff --git a/net/mptcp/options.c b/net/mptcp/options.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -XXX,XX +XXX,XX @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) mpext->dsn64 = 1; mpext->mpc_map = 1; mpext->data_fin = 0; + + if (msk->is_mptfo) { + mptcp_gen_msk_ackseq_fastopen(msk, subflow, mp_opt); + mpext->data_seq = READ_ONCE(msk->ack_seq); + } } else { mpext->data_seq = mp_opt.data_seq; mpext->subflow_seq = mp_opt.subflow_seq; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -XXX,XX +XXX,XX @@ struct mptcp_sock { bool use_64bit_ack; /* Set when we received a 64-bit DSN */ bool csum_enabled; bool allow_infinite_fallback; + bool is_mptfo; u8 mpc_endpoint_id; u8 recvmsg_inq:1, cork:1, @@ -XXX,XX +XXX,XX @@ void mptcp_event_addr_announced(const struct sock *ssk, const struct mptcp_addr_ void mptcp_event_addr_removed(const struct mptcp_sock *msk, u8 id); bool mptcp_userspace_pm_active(const struct mptcp_sock *msk); +// Fast Open Mechanism functions begin +void mptcp_gen_msk_ackseq_fastopen(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, + struct mptcp_options_received mp_opt); +// Fast Open Mechanism functions end + static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk) { return READ_ONCE(msk->pm.addr_signal) & -- 2.34.1
In this patch we add skb to the msk, dequeue it from sk, remove TSs and do skb mapping. Signed-off-by: Dmytro Shytyi <dmytro@shytyi.net> --- net/mptcp/fastopen.c | 78 ++++++++++++++++++++++++++++++++++++++++++++ net/mptcp/protocol.c | 2 +- net/mptcp/protocol.h | 3 ++ net/mptcp/subflow.c | 42 ++++++++++++++++++++++++ 4 files changed, 124 insertions(+), 1 deletion(-) diff --git a/net/mptcp/fastopen.c b/net/mptcp/fastopen.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/fastopen.c +++ b/net/mptcp/fastopen.c @@ -XXX,XX +XXX,XX @@ #include "protocol.h" +struct mptcp_skb_cb { + u64 map_seq; + u64 end_seq; + u32 offset; + u8 has_rxtstamp:1; +}; + +#define MPTCP_SKB_CB(__skb) ((struct mptcp_skb_cb *)&((__skb)->cb[0])) + +void subflow_fastopen_send_synack_set_params(struct mptcp_subflow_context *subflow, + struct request_sock *req) +{ + struct tcp_request_sock *treq = tcp_rsk(req); + struct sock *ssk = subflow->tcp_sock; + struct sock *sk = subflow->conn; + struct mptcp_sock *msk; + struct sk_buff *skb; + struct tcp_sock *tp; + u32 offset; + + msk = mptcp_sk(sk); + tp = tcp_sk(ssk); + + /* mark subflow/msk as "mptfo" */ + msk->is_mptfo = 1; + + skb = skb_peek(&ssk->sk_receive_queue); + + /* dequeue the skb from sk receive queue */ + __skb_unlink(skb, &ssk->sk_receive_queue); + skb_ext_reset(skb); + skb_orphan(skb); + + /* set the skb mapping */ + tp->copied_seq += tp->rcv_nxt - treq->rcv_isn - 1; + subflow->map_seq = mptcp_subflow_get_mapped_dsn(subflow); + subflow->ssn_offset = tp->copied_seq - 1; + + /* innitialize MPTCP_CB */ + offset = tp->copied_seq - TCP_SKB_CB(skb)->seq; + MPTCP_SKB_CB(skb)->map_seq = mptcp_subflow_get_mapped_dsn(subflow); + MPTCP_SKB_CB(skb)->end_seq = MPTCP_SKB_CB(skb)->map_seq + + (skb->len - offset); + MPTCP_SKB_CB(skb)->offset = offset; + MPTCP_SKB_CB(skb)->has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp; + + mptcp_data_lock(sk); + + mptcp_set_owner_r(skb, sk); + __skb_queue_tail(&msk->receive_queue, skb); + + (sk)->sk_data_ready(sk); + + mptcp_data_unlock(sk); +} + +void __mptcp_pre_connect(struct mptcp_sock *msk, struct sock *ssk, + struct msghdr *msg, size_t size) +{ + struct tcp_sock *tp; + struct sk_buff *skb; + struct ubuf_info *uarg; + + lock_sock(ssk); + + tp = tcp_sk(ssk); + + skb = tcp_write_queue_tail(ssk); + uarg = msg_zerocopy_realloc(ssk, size, skb_zcopy(skb)); + tp->fastopen_req = kzalloc(sizeof(*tp->fastopen_req), + ssk->sk_allocation); + tp->fastopen_req->data = msg; + tp->fastopen_req->size = size; + tp->fastopen_req->uarg = uarg; + + release_sock(ssk); +} + void mptcp_gen_msk_ackseq_fastopen(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, struct mptcp_options_received mp_opt) { diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -XXX,XX +XXX,XX @@ static void mptcp_rfree(struct sk_buff *skb) mptcp_rmem_uncharge(sk, len); } -static void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk) +void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk) { skb_orphan(skb); skb->sk = sk; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -XXX,XX +XXX,XX @@ bool mptcp_userspace_pm_active(const struct mptcp_sock *msk); // Fast Open Mechanism functions begin void mptcp_gen_msk_ackseq_fastopen(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, struct mptcp_options_received mp_opt); +void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk); +void subflow_fastopen_send_synack_set_params(struct mptcp_subflow_context *subflow, + struct request_sock *req); // Fast Open Mechanism functions end static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -XXX,XX +XXX,XX @@ static struct dst_entry *subflow_v4_route_req(const struct sock *sk, return NULL; } +static int subflow_v4_send_synack(const struct sock *sk, struct dst_entry *dst, + struct flowi *fl, + struct request_sock *req, + struct tcp_fastopen_cookie *foc, + enum tcp_synack_type synack_type, + struct sk_buff *syn_skb) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + struct inet_request_sock *ireq = inet_rsk(req); + + /* clear tstamp_ok, as needed depending on cookie */ + if (foc && foc->len > -1) + ireq->tstamp_ok = 0; + + if (synack_type == TCP_SYNACK_FASTOPEN) + subflow_fastopen_send_synack_set_params(subflow, req); + + return tcp_request_sock_ipv4_ops.send_synack(sk, dst, fl, req, foc, synack_type, syn_skb); +} + #if IS_ENABLED(CONFIG_MPTCP_IPV6) +static int subflow_v6_send_synack(const struct sock *sk, struct dst_entry *dst, + struct flowi *fl, + struct request_sock *req, + struct tcp_fastopen_cookie *foc, + enum tcp_synack_type synack_type, + struct sk_buff *syn_skb) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + struct inet_request_sock *ireq = inet_rsk(req); + + /* clear tstamp_ok, as needed depending on cookie */ + if (foc && foc->len > -1) + ireq->tstamp_ok = 0; + + if (synack_type == TCP_SYNACK_FASTOPEN) + subflow_fastopen_send_synack_set_params(subflow, req); + + return tcp_request_sock_ipv6_ops.send_synack(sk, dst, fl, req, foc, synack_type, syn_skb); +} + static struct dst_entry *subflow_v6_route_req(const struct sock *sk, struct sk_buff *skb, struct flowi *fl, @@ -XXX,XX +XXX,XX @@ void __init mptcp_subflow_init(void) subflow_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops; subflow_request_sock_ipv4_ops.route_req = subflow_v4_route_req; + subflow_request_sock_ipv4_ops.send_synack = subflow_v4_send_synack; subflow_specific = ipv4_specific; subflow_specific.conn_request = subflow_v4_conn_request; @@ -XXX,XX +XXX,XX @@ void __init mptcp_subflow_init(void) #if IS_ENABLED(CONFIG_MPTCP_IPV6) subflow_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops; subflow_request_sock_ipv6_ops.route_req = subflow_v6_route_req; + subflow_request_sock_ipv6_ops.send_synack = subflow_v6_send_synack; subflow_v6_specific = ipv6_specific; subflow_v6_specific.conn_request = subflow_v6_conn_request; -- 2.34.1
We add the TCP_FASTOPEN socket option in this patch that is going to be set by the listener side. Signed-off-by: Dmytro Shytyi <dmytro@shytyi.net> --- net/mptcp/sockopt.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -XXX,XX +XXX,XX @@ static bool mptcp_supported_sockopt(int level, int optname) case TCP_NOTSENT_LOWAT: case TCP_TX_DELAY: case TCP_INQ: + case TCP_FASTOPEN: case TCP_FASTOPEN_CONNECT: case TCP_FASTOPEN_NO_COOKIE: return true; @@ -XXX,XX +XXX,XX @@ static bool mptcp_supported_sockopt(int level, int optname) /* TCP_REPAIR, TCP_REPAIR_QUEUE, TCP_QUEUE_SEQ, TCP_REPAIR_OPTIONS, * TCP_REPAIR_WINDOW are not supported, better avoid this mess */ - /* TCP_FASTOPEN_KEY, TCP_FASTOPEN are not supported because + /* TCP_FASTOPEN_KEY is not supported because * fastopen for the listener side is currently unsupported */ } @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, /* See tcp.c: TCP_DEFER_ACCEPT does not fail */ mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, optval, optlen); return 0; + case TCP_FASTOPEN: case TCP_FASTOPEN_CONNECT: case TCP_FASTOPEN_NO_COOKIE: return mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, @@ -XXX,XX +XXX,XX @@ static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, case TCP_INFO: case TCP_CC_INFO: case TCP_DEFER_ACCEPT: + case TCP_FASTOPEN: case TCP_FASTOPEN_CONNECT: case TCP_FASTOPEN_NO_COOKIE: return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname, -- 2.34.1
This patch adds the selftests support for mptfo in mptcp_connect.c,.sh We introduce mptfo option, that sets "TCP_FASTOPEN" + "MSG_FASTOPEN" We call sendto() instead of connect(). Signed-off-by: Dmytro Shytyi <dmytro@shytyi.net> --- .../selftests/net/mptcp/mptcp_connect.c | 140 +++++++++++++----- .../selftests/net/mptcp/mptcp_connect.sh | 77 ++++++++++ 2 files changed, 184 insertions(+), 33 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index XXXXXXX..XXXXXXX 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -XXX,XX +XXX,XX @@ struct cfg_cmsg_types { struct cfg_sockopt_types { unsigned int transparent:1; + unsigned int mptfo:1; }; struct tcp_inq_state { @@ -XXX,XX +XXX,XX @@ struct tcp_inq_state { bool expect_eof; }; +static size_t do_write(const int fd, char *buf, const size_t len); static struct tcp_inq_state tcp_inq; static struct cfg_cmsg_types cfg_cmsg_types; @@ -XXX,XX +XXX,XX @@ static void set_transparent(int fd, int pf) } } +static void set_mptfo(int fd, int pf) +{ + int qlen = 25; + + switch (pf) { + case AF_INET: + if (-1 == setsockopt(fd, 6, TCP_FASTOPEN, &qlen, sizeof(qlen))) + perror("TCP_FASTOPEN"); + break; + case AF_INET6: + if (-1 == setsockopt(fd, 6, TCP_FASTOPEN, &qlen, sizeof(qlen))) + perror("TCP_FASTOPEN"); + break; + } +} + static int do_ulp_so(int sock, const char *name) { return setsockopt(sock, IPPROTO_TCP, TCP_ULP, name, strlen(name)); @@ -XXX,XX +XXX,XX @@ static int sock_listen_mptcp(const char * const listenaddr, if (cfg_sockopt_types.transparent) set_transparent(sock, pf); + if (cfg_sockopt_types.mptfo) + set_mptfo(sock, pf); + if (bind(sock, a->ai_addr, a->ai_addrlen) == 0) break; /* success */ @@ -XXX,XX +XXX,XX @@ static int sock_listen_mptcp(const char * const listenaddr, static int sock_connect_mptcp(const char * const remoteaddr, const char * const port, int proto, - struct addrinfo **peer) + struct addrinfo **peer, + int infd, + unsigned int *woff) { struct addrinfo hints = { .ai_protocol = IPPROTO_TCP, .ai_socktype = SOCK_STREAM, }; struct addrinfo *a, *addr; + unsigned int wlen = 0; + int syn_copied = 0; + char wbuf[8192]; int sock = -1; hints.ai_family = pf; @@ -XXX,XX +XXX,XX @@ static int sock_connect_mptcp(const char * const remoteaddr, if (cfg_mark) set_mark(sock, cfg_mark); - if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) { - *peer = a; - break; /* success */ - } + if (cfg_sockopt_types.mptfo) { + if (wlen == 0) + wlen = read(infd, wbuf, sizeof(wbuf)); - perror("connect()"); - close(sock); - sock = -1; + syn_copied = sendto(sock, wbuf, wlen, MSG_FASTOPEN, + a->ai_addr, a->ai_addrlen); + if (syn_copied) { + *woff = syn_copied; + *peer = a; + break; /* success */ + } + } else { + if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) { + *peer = a; + break; /* success */ + } + } + if (cfg_sockopt_types.mptfo) { + perror("sendto()"); + close(sock); + sock = -1; + } else { + + perror("connect()"); + close(sock); + sock = -1; + } } freeaddrinfo(addr); @@ -XXX,XX +XXX,XX @@ static void shut_wr(int fd) shutdown(fd, SHUT_WR); } -static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after_out) +static int copyfd_io_poll(int infd, int peerfd, int outfd, + bool *in_closed_after_out, unsigned int woff) { struct pollfd fds = { .fd = peerfd, .events = POLLIN | POLLOUT, }; - unsigned int woff = 0, wlen = 0, total_wlen = 0, total_rlen = 0; + unsigned int wlen = 0, total_wlen = 0, total_rlen = 0; char wbuf[8192]; set_nonblock(peerfd, true); @@ -XXX,XX +XXX,XX @@ static int do_recvfile(int infd, int outfd) return (int)r; } -static int do_mmap(int infd, int outfd, unsigned int size) +static int do_mmap(int infd, int outfd, unsigned int size, unsigned int syn_off) { - char *inbuf = mmap(NULL, size, PROT_READ, MAP_SHARED, infd, 0); + char *inbuf = mmap(NULL, size, PROT_READ, MAP_SHARED, infd, syn_off); ssize_t ret = 0, off = 0; size_t rem; @@ -XXX,XX +XXX,XX @@ static int do_sendfile(int infd, int outfd, unsigned int count) } static int copyfd_io_mmap(int infd, int peerfd, int outfd, - unsigned int size, bool *in_closed_after_out) + unsigned int size, bool *in_closed_after_out, unsigned int woff) { int err; @@ -XXX,XX +XXX,XX @@ static int copyfd_io_mmap(int infd, int peerfd, int outfd, if (err) return err; - err = do_mmap(infd, peerfd, size); + err = do_mmap(infd, peerfd, size, 0); } else { - err = do_mmap(infd, peerfd, size); + err = do_mmap(infd, peerfd, size - woff, woff); if (err) return err; @@ -XXX,XX +XXX,XX @@ static int copyfd_io_sendfile(int infd, int peerfd, int outfd, return err; } -static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd) +static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd, unsigned int woff) { bool in_closed_after_out = false; struct timespec start, end; @@ -XXX,XX +XXX,XX @@ static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd) switch (cfg_mode) { case CFG_MODE_POLL: - ret = copyfd_io_poll(infd, peerfd, outfd, &in_closed_after_out); + ret = copyfd_io_poll(infd, peerfd, outfd, &in_closed_after_out, woff); break; case CFG_MODE_MMAP: file_size = get_infd_size(infd); if (file_size < 0) return file_size; - ret = copyfd_io_mmap(infd, peerfd, outfd, file_size, &in_closed_after_out); + ret = copyfd_io_mmap(infd, peerfd, outfd, file_size, &in_closed_after_out, woff); break; case CFG_MODE_SENDFILE: @@ -XXX,XX +XXX,XX @@ int main_loop_s(int listensock) SOCK_TEST_TCPULP(remotesock, 0); - copyfd_io(fd, remotesock, 1, true); + copyfd_io(fd, remotesock, 1, true, 0); } else { perror("accept"); return 1; @@ -XXX,XX +XXX,XX @@ static void parse_setsock_options(const char *name) return; } + if (strncmp(name, "MPTFO", len) == 0) { + cfg_sockopt_types.mptfo = 1; + return; + } + fprintf(stderr, "Unrecognized setsockopt option %s\n", name); exit(1); } @@ -XXX,XX +XXX,XX @@ int main_loop(void) { int fd, ret, fd_in = 0; struct addrinfo *peer; + unsigned int woff = 0; - /* listener is ready. */ - fd = sock_connect_mptcp(cfg_host, cfg_port, cfg_sock_proto, &peer); - if (fd < 0) - return 2; + if (!cfg_sockopt_types.mptfo) { + /* listener is ready. */ + fd = sock_connect_mptcp(cfg_host, cfg_port, cfg_sock_proto, &peer, NULL, NULL); + if (fd < 0) + return 2; + } again: - check_getpeername_connect(fd); + if (!cfg_sockopt_types.mptfo) { + check_getpeername_connect(fd); - SOCK_TEST_TCPULP(fd, cfg_sock_proto); - - if (cfg_rcvbuf) - set_rcvbuf(fd, cfg_rcvbuf); - if (cfg_sndbuf) - set_sndbuf(fd, cfg_sndbuf); - if (cfg_cmsg_types.cmsg_enabled) - apply_cmsg_types(fd, &cfg_cmsg_types); + SOCK_TEST_TCPULP(fd, cfg_sock_proto); + if (cfg_rcvbuf) + set_rcvbuf(fd, cfg_rcvbuf); + if (cfg_sndbuf) + set_sndbuf(fd, cfg_sndbuf); + if (cfg_cmsg_types.cmsg_enabled) + apply_cmsg_types(fd, &cfg_cmsg_types); + } if (cfg_input) { fd_in = open(cfg_input, O_RDONLY); if (fd < 0) @@ -XXX,XX +XXX,XX @@ int main_loop(void) } /* close the client socket open only if we are not going to reconnect */ - ret = copyfd_io(fd_in, fd, 1, 0); + + if (cfg_sockopt_types.mptfo) { + /* sendto() instead of connect */ + fd = sock_connect_mptcp(cfg_host, cfg_port, cfg_sock_proto, &peer, fd_in, &woff); + if (fd < 0) + return 2; + } + if (cfg_sockopt_types.mptfo) { + check_getpeername_connect(fd); + + SOCK_TEST_TCPULP(fd, cfg_sock_proto); + + if (cfg_rcvbuf) + set_rcvbuf(fd, cfg_rcvbuf); + if (cfg_sndbuf) + set_sndbuf(fd, cfg_sndbuf); + if (cfg_cmsg_types.cmsg_enabled) + apply_cmsg_types(fd, &cfg_cmsg_types); + } + ret = copyfd_io(fd_in, fd, 1, 0, woff); if (ret) return ret; diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index XXXXXXX..XXXXXXX 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -XXX,XX +XXX,XX @@ run_tests() run_tests_lo $1 $2 $3 0 } +run_test_mptfo(){ + + local listener_ns="$1" + local connector_ns="$2" + local connect_addr="$3" + local loopback="$4" + local extra_args="$5" + local msg="$6" + local lret=0 + + ip netns exec "$listener_ns" sysctl -q net.ipv4.tcp_fastopen=2 + ip netns exec "$connector_ns" sysctl -q net.ipv4.tcp_fastopen=1 + + # skip if test programs are running inside same netns for subsequent runs. + if [ $loopback -eq 0 ] && [ ${listener_ns} = ${connector_ns} ]; then + return 0 + fi + + # skip if we don't want v6 + if ! $ipv6 && is_v6 "${connect_addr}"; then + return 0 + fi + + local local_addr + if is_v6 "${connect_addr}"; then + local_addr="::" + else + local_addr="0.0.0.0" + fi + + + + TEST_COUNT=10000 + do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP \ + ${connect_addr} ${local_addr} "${extra_args}" + lret=$? + + if [ $lret -ne 0 ]; then + echo "FAIL: $msg, mptcp connection error" 1>&2 + ret=$lret + return 1 + fi + + echo "PASS: $msg" + + + ip netns exec "$listener_ns" sysctl -q net.ipv4.tcp_fastopen=0 + ip netns exec "$connector_ns" sysctl -q net.ipv4.tcp_fastopen=0 + return 0 + +} + run_test_transparent() { local connect_addr="$1" @@ -XXX,XX +XXX,XX @@ run_test_transparent 10.0.3.1 "tproxy ipv4" run_test_transparent dead:beef:3::1 "tproxy ipv6" stop_if_error "Tests with tproxy have failed" +# MPTFO (MultiPath TCP Fatopen tests) + +for sender in $ns1 $ns2 $ns3 $ns4;do + run_test_mptfo "$ns1" $sender 10.0.1.1 0 "-o MPTFO" "mptfoIPv4" + run_test_mptfo "$ns1" $sender dead:beef:1::1 0 "-o MPTFO" "mptfoIPv6" + + run_test_mptfo "$ns2" $sender 10.0.1.2 0 "-o MPTFO" "mptfoIPv4" + run_test_mptfo "$ns2" $sender dead:beef:1::2 0 "-o MPTFO" "mptfoIPv6" + run_test_mptfo "$ns2" $sender 10.0.2.1 0 "-o MPTFO" "mptfoIPv4" + run_test_mptfo "$ns2" $sender dead:beef:2::1 0 "-o MPTFO" "mptfoIPv6" + + run_test_mptfo "$ns3" $sender 10.0.2.2 0 "-o MPTFO" "mptfoIPv4" + run_test_mptfo "$ns3" $sender dead:beef:2::2 0 "-o MPTFO" "mptfoIPv6" + run_test_mptfo "$ns3" $sender 10.0.3.2 0 "-o MPTFO" "mptfoIPv4" + run_test_mptfo "$ns3" $sender dead:beef:3::2 0 "-o MPTFO" "mptfoIPv6" + + run_test_mptfo "$ns4" $sender 10.0.3.1 0 "-o MPTFO" "mptfoIPv4" + run_test_mptfo "$ns4" $sender dead:beef:3::1 0 "-o MPTFO" "mptfoIPv6" + + stop_if_error "Tests with $sender as a sender have failed" +done +#run_test_mptfo 10.0.3.1 "mptfoIPv4" +#run_test_mptfo dead:beef:3::1 "mptfoIPv6" +#stop_if_error "Tests with MPTFO have failed" + run_tests_disconnect display_time -- 2.34.1