:p
atchew
Login
This set of patches will bring "Fast Open" Option support to MPTCP. The aim of Fast Open Mechanism is to eliminate one round trip time from a TCP conversation by allowing data to be included as part of the SYN segment that initiates the connection. IETF RFC 8684: Appendix B. TCP Fast Open and MPTCP. [PATCH v1] includes "client" partial support for : 1. send request for cookie; 2. send syn+data+cookie. Signed-off-by: Dmytro Shytyi <dmytro@shytyi.net> --- diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -XXX,XX +XXX,XX @@ static void mptcp_set_nospace(struct sock *sk) set_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags); } +static int mptcp_sendmsg_fastopen_cookie_req(struct sock *sk, struct msghdr *msg, + size_t *copied, size_t size, + struct ubuf_info *uarg) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + struct socket *ssk = __mptcp_nmpc_socket(msk); + struct tcp_sock *tp = tcp_sk(ssk->sk); + struct sockaddr *uaddr = msg->msg_name; + struct tcp_fastopen_context *ctx; + const struct iphdr *iph; + struct sk_buff *skb; + int err; + + skb = sk_stream_alloc_skb(ssk->sk, 0, ssk->sk->sk_allocation, true); + iph = ip_hdr(skb); + tcp_fastopen_init_key_once(sock_net(ssk->sk)); + ctx = tcp_fastopen_get_ctx(ssk->sk); + tp->fastopen_req = kzalloc(sizeof(*tp->fatopen_req), + ssk->sk->sk_allocation); + tp->fastopen_req->data = msg; + tp->fastopen_req->size = size; + tp->fastopen_req->uarg = uarg; + err = mptcp_stream_connect(sk->sk_socket, uaddr, msg->msg_namelen, msg->msg_flags); + return err; +} + +static int mptcp_sendmsg_fastopen_cookie_send(struct sock *sk, struct msghdr *msg, + size_t *copied, size_t size, + struct ubuf_info *uarg) +{ + struct tcp_fastopen_cookie *fastopen_cookie = kmalloc(sizeof(*fastopen_cookie), + GFP_KERNEL); + struct mptcp_sock *msk = mptcp_sk(sk); + struct socket *ssk = __mptcp_nmpc_socket(msk); + struct tcp_sock *tp = tcp_sk(ssk->sk); + struct sockaddr *uaddr = msg->msg_name; + struct tcp_fastopen_context *ctx; + const struct iphdr *iph; + struct sk_buff *skb; + int err; + + skb = sk_stream_alloc_skb(ssk->sk, 0, ssk->sk->sk_allocation, true); + iph = ip_hdr(skb); + tcp_fastopen_init_key_once(sock_net(ssk->sk)); + ctx = tcp_fastopen_get_ctx(ssk->sk); + + fastopen_cookie->val[0] = cpu_to_le64(siphash(&iph->saddr, + sizeof(iph->saddr) + + sizeof(iph->daddr), + &ctx->key[0])); + fastopen_cookie->len = TCP_FASTOPEN_COOKIE_SIZE; + + tp->fastopen_req = kzalloc(sizeof(*tp->fastopen_req), + ssk->sk->sk_allocation); + tp->fastopen_req->data = msg; + tp->fastopen_req->size = size; + tp->fastopen_req->uarg = uarg; + memcpy(&tp->fastopen_req->cookie, fastopen_cookie, sizeof(tp->fastopen_req->cookie)); + err = mptcp_stream_connect(sk->sk_socket, uaddr, msg->msg_namelen, msg->msg_flags); + return err; +} + static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct mptcp_sock *msk = mptcp_sk(sk); @@ -XXX,XX +XXX,XX @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) int ret = 0; long timeo; - /* we don't support FASTOPEN yet */ - if (msg->msg_flags & MSG_FASTOPEN) - return -EOPNOTSUPP; + /* we don't fully support FASTOPEN yet */ + + if (msg->msg_flags & MSG_FASTOPEN) { + struct socket *ssk = __mptcp_nmpc_socket(msk); + struct tcp_sock *tp = tcp_sk(ssk->sk); + + if (tp && tp->fastopen_req && tp->fastopen_req->cookie.len != 0) { + // send cookie + ret = mptcp_sendmsg_fastopen_cookie_send(sk, msg, &copied, len, uarg); + } else { + struct tcp_fastopen_request *fastopen = tp->fastopen_req; + //requests a cookie + ret = mptcp_sendmsg_fastopen_cookie_req(sk, msg, &copied, len, uarg); + } + return ret; + } /* silently ignore everything else */ msg->msg_flags &= MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL;
This set of patches will bring "Fast Open" Option support to MPTCP. The aim of Fast Open Mechanism is to eliminate one round trip time from a TCP conversation by allowing data to be included as part of the SYN segment that initiates the connection. IETF RFC 8684: Appendix B. TCP Fast Open and MPTCP. [PATCH v2] includes "client-server" partial support for : 1. MPTCP cookie request from client. 2. MPTCP cookie offering from server. 3. MPTCP SYN+DATA+COOKIE from client. 4. subsequent write + read on the opened socket. This patch is Work In Progress and an early draft shared due community request. Signed-off-by: Dmytro SHYTYI <dmytro@shytyi.net> --- include/linux/tcp.h | 7 ++++ net/ipv4/inet_connection_sock.c | 3 +- net/ipv4/tcp_fastopen.c | 42 +++++++++++++++++++---- net/ipv4/tcp_input.c | 16 +++++---- net/mptcp/protocol.c | 59 ++++++++++++++++++++++++++++++--- net/mptcp/sockopt.c | 40 ++++++++++++++++++++++ net/mptcp/subflow.c | 14 ++++++++ 7 files changed, 162 insertions(+), 19 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index XXXXXXX..XXXXXXX 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -XXX,XX +XXX,XX @@ static inline unsigned int tcp_optlen(const struct sk_buff *skb) /* TCP Fast Open */ #define TCP_FASTOPEN_COOKIE_MIN 4 /* Min Fast Open Cookie size in bytes */ #define TCP_FASTOPEN_COOKIE_MAX 16 /* Max Fast Open Cookie size in bytes */ + +#if IS_ENABLED(CONFIG_MPTCP) +#define TCP_FASTOPEN_COOKIE_SIZE 4 /* the size employed by MPTCP impl. */ +#else #define TCP_FASTOPEN_COOKIE_SIZE 8 /* the size employed by this impl. */ +#endif + + /* TCP Fast Open Cookie as stored in memory */ struct tcp_fastopen_cookie { diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index XXXXXXX..XXXXXXX 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -XXX,XX +XXX,XX @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern) req = reqsk_queue_remove(queue, sk); newsk = req->sk; - if (sk->sk_protocol == IPPROTO_TCP && + if ((sk->sk_protocol == IPPROTO_TCP || + sk->sk_protocol == IPPROTO_MPTCP) && tcp_rsk(req)->tfo_listener) { spin_lock_bh(&queue->fastopenq.lock); if (tcp_rsk(req)->tfo_listener) { diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index XXXXXXX..XXXXXXX 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -XXX,XX +XXX,XX @@ static bool __tcp_fastopen_cookie_gen_cipher(struct request_sock *req, const siphash_key_t *key, struct tcp_fastopen_cookie *foc) { +#if IS_ENABLED(CONFIG_MPTCP) + BUILD_BUG_ON(TCP_FASTOPEN_COOKIE_SIZE != sizeof(u32)); +#else BUILD_BUG_ON(TCP_FASTOPEN_COOKIE_SIZE != sizeof(u64)); +#endif if (req->rsk_ops->family == AF_INET) { const struct iphdr *iph = ip_hdr(syn); +#if IS_ENABLED(CONFIG_MPTCP) + foc->val[0] = cpu_to_le32(siphash(&iph->saddr, + sizeof(iph->saddr) + + sizeof(iph->daddr), + key)); +#else foc->val[0] = cpu_to_le64(siphash(&iph->saddr, sizeof(iph->saddr) + sizeof(iph->daddr), key)); +#endif foc->len = TCP_FASTOPEN_COOKIE_SIZE; return true; } @@ -XXX,XX +XXX,XX @@ static bool __tcp_fastopen_cookie_gen_cipher(struct request_sock *req, /* Generate the fastopen cookie by applying SipHash to both the source and * destination addresses. */ +/* static void tcp_fastopen_cookie_gen(struct sock *sk, struct request_sock *req, struct sk_buff *syn, @@ -XXX,XX +XXX,XX @@ static void tcp_fastopen_cookie_gen(struct sock *sk, __tcp_fastopen_cookie_gen_cipher(req, syn, &ctx->key[0], foc); rcu_read_unlock(); } +*/ /* If an incoming SYN or SYNACK frame contains a payload and/or FIN, * queue this additional data / FIN. @@ -XXX,XX +XXX,XX @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, */ return child; } - +/* static bool tcp_fastopen_queue_check(struct sock *sk) { struct fastopen_queue *fastopenq; - /* Make sure the listener has enabled fastopen, and we don't + * Make sure the listener has enabled fastopen, and we don't * exceed the max # of pending TFO requests allowed before trying * to validating the cookie in order to avoid burning CPU cycles * unnecessarily. @@ -XXX,XX +XXX,XX @@ static bool tcp_fastopen_queue_check(struct sock *sk) * processing a cookie request is that clients can't differentiate * between qlen overflow causing Fast Open to be disabled * temporarily vs a server not supporting Fast Open at all. - */ + * fastopenq = &inet_csk(sk)->icsk_accept_queue.fastopenq; if (fastopenq->max_qlen == 0) return false; @@ -XXX,XX +XXX,XX @@ static bool tcp_fastopen_queue_check(struct sock *sk) } return true; } - +*/ static bool tcp_fastopen_no_cookie(const struct sock *sk, const struct dst_entry *dst, int flag) @@ -XXX,XX +XXX,XX @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, struct tcp_fastopen_cookie *foc, const struct dst_entry *dst) { + /* bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1; int tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen; + */ struct tcp_fastopen_cookie valid_foc = { .len = -1 }; struct sock *child; int ret = 0; if (foc->len == 0) /* Client requests a cookie */ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD); - +/* if (!((tcp_fastopen & TFO_SERVER_ENABLE) && (syn_data || foc->len >= 0) && tcp_fastopen_queue_check(sk))) { foc->len = -1; return NULL; } - +*/ if (tcp_fastopen_no_cookie(sk, dst, TFO_SERVER_COOKIE_NOT_REQD)) goto fastopen; if (foc->len == 0) { /* Client requests a cookie. */ - tcp_fastopen_cookie_gen(sk, req, skb, &valid_foc); + //tcp_fastopen_cookie_gen(sk, req, skb, &valid_foc); + + struct tcp_fastopen_context *ctx; + struct iphdr *iph = ip_hdr(skb); + + tcp_fastopen_init_key_once(sock_net(sk)); + ctx = tcp_fastopen_get_ctx(sk); + + valid_foc.val[0] = cpu_to_le32(siphash(&iph->saddr, + sizeof(iph->saddr) + + sizeof(iph->daddr), + &ctx->key[0])); + valid_foc.len = TCP_FASTOPEN_COOKIE_SIZE; + } else if (foc->len > 0) { ret = tcp_fastopen_cookie_gen_check(sk, req, skb, foc, &valid_foc); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index XXXXXXX..XXXXXXX 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -XXX,XX +XXX,XX @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb) } else { tcp_update_wl(tp, TCP_SKB_CB(skb)->seq); } - __tcp_ack_snd_check(sk, 0); no_ack: if (eaten) @@ -XXX,XX +XXX,XX @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, } if (fastopen_fail) return -1; - if (sk->sk_write_pending || - icsk->icsk_accept_queue.rskq_defer_accept || - inet_csk_in_pingpong_mode(sk)) { + + if ((sk->sk_write_pending || + icsk->icsk_accept_queue.rskq_defer_accept || + inet_csk_in_pingpong_mode(sk)) && !th->syn) { /* Save one ACK. Data will be ready after * several ticks, if write_pending is set. * @@ -XXX,XX +XXX,XX @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS); inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MAX, TCP_RTO_MAX); - discard: tcp_drop(sk, skb); + tcp_send_ack(sk); + return 0; } else { tcp_send_ack(sk); @@ -XXX,XX +XXX,XX @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) tcp_urg(sk, skb, th); __kfree_skb(skb); tcp_data_snd_check(sk); + return 0; } @@ -XXX,XX +XXX,XX @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, */ pr_drop_req(req, ntohs(tcp_hdr(skb)->source), rsk_ops->family); - goto drop_and_release; + //goto drop_and_release; } isn = af_ops->init_seq(skb); @@ -XXX,XX +XXX,XX @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, reqsk_put(req); return 0; -drop_and_release: +//drop_and_release: dst_release(dst); drop_and_free: __reqsk_free(req); diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -XXX,XX +XXX,XX @@ static struct percpu_counter mptcp_sockets_allocated; static void __mptcp_destroy_sock(struct sock *sk); static void __mptcp_check_send_data_fin(struct sock *sk); +static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr, + int addr_len, int flags); DEFINE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions); static struct net_device mptcp_napi_dev; @@ -XXX,XX +XXX,XX @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk) } } +static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, + size_t len, struct mptcp_sock *msk, size_t copied) +{ + const struct iphdr *iph; + struct ubuf_info *uarg; + struct sockaddr *uaddr; + struct sk_buff *skb; + struct tcp_sock *tp; + struct socket *ssk; + int ret; + + ssk = __mptcp_nmpc_socket(msk); + if (unlikely(!ssk)) + goto out_EFAULT; + skb = sk_stream_alloc_skb(ssk->sk, 0, ssk->sk->sk_allocation, true); + if (unlikely(!skb)) + goto out_EFAULT; + iph = ip_hdr(skb); + if (unlikely(!iph)) + goto out_EFAULT; + uarg = msg_zerocopy_realloc(sk, len, skb_zcopy(skb)); + if (unlikely(!uarg)) + goto out_EFAULT; + uaddr = msg->msg_name; + + tp = tcp_sk(ssk->sk); + if (unlikely(!tp)) + goto out_EFAULT; + if (!tp->fastopen_req) + tp->fastopen_req = kzalloc(sizeof(*tp->fastopen_req), ssk->sk->sk_allocation); + + if (unlikely(!tp->fastopen_req)) + goto out_EFAULT; + tp->fastopen_req->data = msg; + tp->fastopen_req->size = len; + tp->fastopen_req->uarg = uarg; + + /* requests a cookie */ + ret = mptcp_stream_connect(sk->sk_socket, uaddr, + msg->msg_namelen, msg->msg_flags); + + return ret; +out_EFAULT: + ret = -EFAULT; + return ret; +} + static void mptcp_set_nospace(struct sock *sk) { /* enable autotune */ @@ -XXX,XX +XXX,XX @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) int ret = 0; long timeo; - /* we don't support FASTOPEN yet */ + /* we don't fully support FASTOPEN yet */ if (msg->msg_flags & MSG_FASTOPEN) - return -EOPNOTSUPP; + ret = mptcp_sendmsg_fastopen(sk, msg, len, msk, copied); /* silently ignore everything else */ msg->msg_flags &= MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL; @@ -XXX,XX +XXX,XX @@ static void mptcp_worker(struct work_struct *work) if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) __mptcp_close_subflow(msk); - + /* if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags)) __mptcp_retrans(sk); - + */ unlock: release_sock(sk); sock_put(sk); @@ -XXX,XX +XXX,XX @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how) case TCP_SYN_SENT: tcp_disconnect(ssk, O_NONBLOCK); break; + case TCP_ESTABLISHED: + break; default: if (__mptcp_check_fallback(mptcp_sk(sk))) { pr_debug("Fallback"); diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -XXX,XX +XXX,XX @@ static bool mptcp_supported_sockopt(int level, int optname) case TCP_TIMESTAMP: case TCP_NOTSENT_LOWAT: case TCP_TX_DELAY: + case TCP_FASTOPEN: return true; } @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t return ret; } +static int mptcp_setsockopt_sol_tcp_fastopen(struct mptcp_sock *msk, sockptr_t optval, + unsigned int optlen) +{ + struct mptcp_subflow_context *subflow; + struct sock *sk = (struct sock *)msk; + struct net *net = sock_net(sk); + int val; + int ret; + + ret = 0; + + if (copy_from_sockptr(&val, optval, sizeof(val))) + return -EFAULT; + + lock_sock(sk); + + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + + lock_sock(ssk); + + if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE | + TCPF_LISTEN))) { + tcp_fastopen_init_key_once(net); + fastopen_queue_tune(sk, val); + } else { + ret = -EINVAL; + } + + release_sock(ssk); + } + + release_sock(sk); + + return ret; +} + static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, sockptr_t optval, unsigned int optlen) { @@ -XXX,XX +XXX,XX @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, return -EOPNOTSUPP; case TCP_CONGESTION: return mptcp_setsockopt_sol_tcp_congestion(msk, optval, optlen); + case TCP_FASTOPEN: + return mptcp_setsockopt_sol_tcp_fastopen(msk, optval, optlen); } return -EOPNOTSUPP; diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index XXXXXXX..XXXXXXX 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -XXX,XX +XXX,XX @@ static enum mapping_status get_mapping_status(struct sock *ssk, trace_get_mapping_status(mpext); data_len = mpext->data_len; + if (data_len == 0) { MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX); return MAPPING_INVALID; @@ -XXX,XX +XXX,XX @@ static enum mapping_status get_mapping_status(struct sock *ssk, /* If this skb data are fully covered by the current mapping, * the new map would need caching, which is not supported */ + if (skb_is_fully_mapped(ssk, skb)) { MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSNOMATCH); return MAPPING_INVALID; @@ -XXX,XX +XXX,XX @@ static enum mapping_status get_mapping_status(struct sock *ssk, subflow->map_data_csum = csum_unfold(mpext->csum); /* Cfr RFC 8684 Section 3.3.0 */ + if (unlikely(subflow->map_csum_reqd != csum_reqd)) return MAPPING_INVALID; @@ -XXX,XX +XXX,XX @@ static bool subflow_check_data_avail(struct sock *ssk) } if (subflow->mp_join || subflow->fully_established) { + skb = skb_peek(&ssk->sk_receive_queue); + subflow->map_valid = 1; + subflow->map_seq = READ_ONCE(msk->ack_seq); + subflow->map_data_len = skb->len; + subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - subflow->ssn_offset; + + WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL); + return true; + /* fatal protocol error, close the socket. * subflow_error_report() will introduce the appropriate barriers */ + /* ssk->sk_err = EBADMSG; tcp_set_state(ssk, TCP_CLOSE); subflow->reset_transient = 0; @@ -XXX,XX +XXX,XX @@ static bool subflow_check_data_avail(struct sock *ssk) tcp_send_active_reset(ssk, GFP_ATOMIC); WRITE_ONCE(subflow->data_avail, 0); return false; + */ } __mptcp_do_fallback(msk); -- 2.25.1