include/linux/tcp.h | 7 ++++ net/ipv4/inet_connection_sock.c | 3 +- net/ipv4/tcp_fastopen.c | 42 +++++++++++++++++++---- net/ipv4/tcp_input.c | 16 +++++---- net/mptcp/protocol.c | 59 ++++++++++++++++++++++++++++++--- net/mptcp/sockopt.c | 40 ++++++++++++++++++++++ net/mptcp/subflow.c | 14 ++++++++ 7 files changed, 162 insertions(+), 19 deletions(-)
This set of patches will bring "Fast Open" Option support to MPTCP.
The aim of Fast Open Mechanism is to eliminate one round trip
time from a TCP conversation by allowing data to be included as
part of the SYN segment that initiates the connection.
IETF RFC 8684: Appendix B. TCP Fast Open and MPTCP.
[PATCH v2] includes "client-server" partial support for :
1. MPTCP cookie request from client.
2. MPTCP cookie offering from server.
3. MPTCP SYN+DATA+COOKIE from client.
4. subsequent write + read on the opened socket.
This patch is Work In Progress and an early draft shared due community
request.
Signed-off-by: Dmytro SHYTYI <dmytro@shytyi.net>
---
include/linux/tcp.h | 7 ++++
net/ipv4/inet_connection_sock.c | 3 +-
net/ipv4/tcp_fastopen.c | 42 +++++++++++++++++++----
net/ipv4/tcp_input.c | 16 +++++----
net/mptcp/protocol.c | 59 ++++++++++++++++++++++++++++++---
net/mptcp/sockopt.c | 40 ++++++++++++++++++++++
net/mptcp/subflow.c | 14 ++++++++
7 files changed, 162 insertions(+), 19 deletions(-)
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 48d8a363319e..d7092234d442 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -54,7 +54,14 @@ static inline unsigned int tcp_optlen(const struct sk_buff *skb)
/* TCP Fast Open */
#define TCP_FASTOPEN_COOKIE_MIN 4 /* Min Fast Open Cookie size in bytes */
#define TCP_FASTOPEN_COOKIE_MAX 16 /* Max Fast Open Cookie size in bytes */
+
+#if IS_ENABLED(CONFIG_MPTCP)
+#define TCP_FASTOPEN_COOKIE_SIZE 4 /* the size employed by MPTCP impl. */
+#else
#define TCP_FASTOPEN_COOKIE_SIZE 8 /* the size employed by this impl. */
+#endif
+
+
/* TCP Fast Open Cookie as stored in memory */
struct tcp_fastopen_cookie {
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index f7fea3a7c5e6..4b4159c0258d 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -501,7 +501,8 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern)
req = reqsk_queue_remove(queue, sk);
newsk = req->sk;
- if (sk->sk_protocol == IPPROTO_TCP &&
+ if ((sk->sk_protocol == IPPROTO_TCP ||
+ sk->sk_protocol == IPPROTO_MPTCP) &&
tcp_rsk(req)->tfo_listener) {
spin_lock_bh(&queue->fastopenq.lock);
if (tcp_rsk(req)->tfo_listener) {
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index fdbcf2a6d08e..d26378983ed7 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -119,15 +119,26 @@ static bool __tcp_fastopen_cookie_gen_cipher(struct request_sock *req,
const siphash_key_t *key,
struct tcp_fastopen_cookie *foc)
{
+#if IS_ENABLED(CONFIG_MPTCP)
+ BUILD_BUG_ON(TCP_FASTOPEN_COOKIE_SIZE != sizeof(u32));
+#else
BUILD_BUG_ON(TCP_FASTOPEN_COOKIE_SIZE != sizeof(u64));
+#endif
if (req->rsk_ops->family == AF_INET) {
const struct iphdr *iph = ip_hdr(syn);
+#if IS_ENABLED(CONFIG_MPTCP)
+ foc->val[0] = cpu_to_le32(siphash(&iph->saddr,
+ sizeof(iph->saddr) +
+ sizeof(iph->daddr),
+ key));
+#else
foc->val[0] = cpu_to_le64(siphash(&iph->saddr,
sizeof(iph->saddr) +
sizeof(iph->daddr),
key));
+#endif
foc->len = TCP_FASTOPEN_COOKIE_SIZE;
return true;
}
@@ -149,6 +160,7 @@ static bool __tcp_fastopen_cookie_gen_cipher(struct request_sock *req,
/* Generate the fastopen cookie by applying SipHash to both the source and
* destination addresses.
*/
+/*
static void tcp_fastopen_cookie_gen(struct sock *sk,
struct request_sock *req,
struct sk_buff *syn,
@@ -162,6 +174,7 @@ static void tcp_fastopen_cookie_gen(struct sock *sk,
__tcp_fastopen_cookie_gen_cipher(req, syn, &ctx->key[0], foc);
rcu_read_unlock();
}
+*/
/* If an incoming SYN or SYNACK frame contains a payload and/or FIN,
* queue this additional data / FIN.
@@ -291,12 +304,12 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
*/
return child;
}
-
+/*
static bool tcp_fastopen_queue_check(struct sock *sk)
{
struct fastopen_queue *fastopenq;
- /* Make sure the listener has enabled fastopen, and we don't
+ * Make sure the listener has enabled fastopen, and we don't
* exceed the max # of pending TFO requests allowed before trying
* to validating the cookie in order to avoid burning CPU cycles
* unnecessarily.
@@ -305,7 +318,7 @@ static bool tcp_fastopen_queue_check(struct sock *sk)
* processing a cookie request is that clients can't differentiate
* between qlen overflow causing Fast Open to be disabled
* temporarily vs a server not supporting Fast Open at all.
- */
+ *
fastopenq = &inet_csk(sk)->icsk_accept_queue.fastopenq;
if (fastopenq->max_qlen == 0)
return false;
@@ -327,7 +340,7 @@ static bool tcp_fastopen_queue_check(struct sock *sk)
}
return true;
}
-
+*/
static bool tcp_fastopen_no_cookie(const struct sock *sk,
const struct dst_entry *dst,
int flag)
@@ -346,28 +359,43 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
struct tcp_fastopen_cookie *foc,
const struct dst_entry *dst)
{
+ /*
bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1;
int tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
+ */
struct tcp_fastopen_cookie valid_foc = { .len = -1 };
struct sock *child;
int ret = 0;
if (foc->len == 0) /* Client requests a cookie */
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD);
-
+/*
if (!((tcp_fastopen & TFO_SERVER_ENABLE) &&
(syn_data || foc->len >= 0) &&
tcp_fastopen_queue_check(sk))) {
foc->len = -1;
return NULL;
}
-
+*/
if (tcp_fastopen_no_cookie(sk, dst, TFO_SERVER_COOKIE_NOT_REQD))
goto fastopen;
if (foc->len == 0) {
/* Client requests a cookie. */
- tcp_fastopen_cookie_gen(sk, req, skb, &valid_foc);
+ //tcp_fastopen_cookie_gen(sk, req, skb, &valid_foc);
+
+ struct tcp_fastopen_context *ctx;
+ struct iphdr *iph = ip_hdr(skb);
+
+ tcp_fastopen_init_key_once(sock_net(sk));
+ ctx = tcp_fastopen_get_ctx(sk);
+
+ valid_foc.val[0] = cpu_to_le32(siphash(&iph->saddr,
+ sizeof(iph->saddr) +
+ sizeof(iph->daddr),
+ &ctx->key[0]));
+ valid_foc.len = TCP_FASTOPEN_COOKIE_SIZE;
+
} else if (foc->len > 0) {
ret = tcp_fastopen_cookie_gen_check(sk, req, skb, foc,
&valid_foc);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 246ab7b5e857..915570132014 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5908,7 +5908,6 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
} else {
tcp_update_wl(tp, TCP_SKB_CB(skb)->seq);
}
-
__tcp_ack_snd_check(sk, 0);
no_ack:
if (eaten)
@@ -6229,9 +6228,10 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
}
if (fastopen_fail)
return -1;
- if (sk->sk_write_pending ||
- icsk->icsk_accept_queue.rskq_defer_accept ||
- inet_csk_in_pingpong_mode(sk)) {
+
+ if ((sk->sk_write_pending ||
+ icsk->icsk_accept_queue.rskq_defer_accept ||
+ inet_csk_in_pingpong_mode(sk)) && !th->syn) {
/* Save one ACK. Data will be ready after
* several ticks, if write_pending is set.
*
@@ -6243,9 +6243,10 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
TCP_DELACK_MAX, TCP_RTO_MAX);
-
discard:
tcp_drop(sk, skb);
+ tcp_send_ack(sk);
+
return 0;
} else {
tcp_send_ack(sk);
@@ -6425,6 +6426,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
tcp_urg(sk, skb, th);
__kfree_skb(skb);
tcp_data_snd_check(sk);
+
return 0;
}
@@ -6901,7 +6903,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
*/
pr_drop_req(req, ntohs(tcp_hdr(skb)->source),
rsk_ops->family);
- goto drop_and_release;
+ //goto drop_and_release;
}
isn = af_ops->init_seq(skb);
@@ -6954,7 +6956,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
reqsk_put(req);
return 0;
-drop_and_release:
+//drop_and_release:
dst_release(dst);
drop_and_free:
__reqsk_free(req);
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index cd6b11c9b54d..3020a9c95a31 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -52,6 +52,8 @@ static struct percpu_counter mptcp_sockets_allocated;
static void __mptcp_destroy_sock(struct sock *sk);
static void __mptcp_check_send_data_fin(struct sock *sk);
+static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
+ int addr_len, int flags);
DEFINE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
static struct net_device mptcp_napi_dev;
@@ -1677,6 +1679,53 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
}
}
+static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
+ size_t len, struct mptcp_sock *msk, size_t copied)
+{
+ const struct iphdr *iph;
+ struct ubuf_info *uarg;
+ struct sockaddr *uaddr;
+ struct sk_buff *skb;
+ struct tcp_sock *tp;
+ struct socket *ssk;
+ int ret;
+
+ ssk = __mptcp_nmpc_socket(msk);
+ if (unlikely(!ssk))
+ goto out_EFAULT;
+ skb = sk_stream_alloc_skb(ssk->sk, 0, ssk->sk->sk_allocation, true);
+ if (unlikely(!skb))
+ goto out_EFAULT;
+ iph = ip_hdr(skb);
+ if (unlikely(!iph))
+ goto out_EFAULT;
+ uarg = msg_zerocopy_realloc(sk, len, skb_zcopy(skb));
+ if (unlikely(!uarg))
+ goto out_EFAULT;
+ uaddr = msg->msg_name;
+
+ tp = tcp_sk(ssk->sk);
+ if (unlikely(!tp))
+ goto out_EFAULT;
+ if (!tp->fastopen_req)
+ tp->fastopen_req = kzalloc(sizeof(*tp->fastopen_req), ssk->sk->sk_allocation);
+
+ if (unlikely(!tp->fastopen_req))
+ goto out_EFAULT;
+ tp->fastopen_req->data = msg;
+ tp->fastopen_req->size = len;
+ tp->fastopen_req->uarg = uarg;
+
+ /* requests a cookie */
+ ret = mptcp_stream_connect(sk->sk_socket, uaddr,
+ msg->msg_namelen, msg->msg_flags);
+
+ return ret;
+out_EFAULT:
+ ret = -EFAULT;
+ return ret;
+}
+
static void mptcp_set_nospace(struct sock *sk)
{
/* enable autotune */
@@ -1694,9 +1743,9 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
int ret = 0;
long timeo;
- /* we don't support FASTOPEN yet */
+ /* we don't fully support FASTOPEN yet */
if (msg->msg_flags & MSG_FASTOPEN)
- return -EOPNOTSUPP;
+ ret = mptcp_sendmsg_fastopen(sk, msg, len, msk, copied);
/* silently ignore everything else */
msg->msg_flags &= MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL;
@@ -2482,10 +2531,10 @@ static void mptcp_worker(struct work_struct *work)
if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
__mptcp_close_subflow(msk);
-
+ /*
if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
__mptcp_retrans(sk);
-
+ */
unlock:
release_sock(sk);
sock_put(sk);
@@ -2589,6 +2638,8 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how)
case TCP_SYN_SENT:
tcp_disconnect(ssk, O_NONBLOCK);
break;
+ case TCP_ESTABLISHED:
+ break;
default:
if (__mptcp_check_fallback(mptcp_sk(sk))) {
pr_debug("Fallback");
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 0f1e661c2032..0e471e31e72a 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -539,6 +539,7 @@ static bool mptcp_supported_sockopt(int level, int optname)
case TCP_TIMESTAMP:
case TCP_NOTSENT_LOWAT:
case TCP_TX_DELAY:
+ case TCP_FASTOPEN:
return true;
}
@@ -598,6 +599,43 @@ static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t
return ret;
}
+static int mptcp_setsockopt_sol_tcp_fastopen(struct mptcp_sock *msk, sockptr_t optval,
+ unsigned int optlen)
+{
+ struct mptcp_subflow_context *subflow;
+ struct sock *sk = (struct sock *)msk;
+ struct net *net = sock_net(sk);
+ int val;
+ int ret;
+
+ ret = 0;
+
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
+ return -EFAULT;
+
+ lock_sock(sk);
+
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+
+ lock_sock(ssk);
+
+ if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE |
+ TCPF_LISTEN))) {
+ tcp_fastopen_init_key_once(net);
+ fastopen_queue_tune(sk, val);
+ } else {
+ ret = -EINVAL;
+ }
+
+ release_sock(ssk);
+ }
+
+ release_sock(sk);
+
+ return ret;
+}
+
static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
sockptr_t optval, unsigned int optlen)
{
@@ -606,6 +644,8 @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
return -EOPNOTSUPP;
case TCP_CONGESTION:
return mptcp_setsockopt_sol_tcp_congestion(msk, optval, optlen);
+ case TCP_FASTOPEN:
+ return mptcp_setsockopt_sol_tcp_fastopen(msk, optval, optlen);
}
return -EOPNOTSUPP;
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 6172f380dfb7..82976b31f2f2 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -966,6 +966,7 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
trace_get_mapping_status(mpext);
data_len = mpext->data_len;
+
if (data_len == 0) {
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX);
return MAPPING_INVALID;
@@ -1024,6 +1025,7 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
/* If this skb data are fully covered by the current mapping,
* the new map would need caching, which is not supported
*/
+
if (skb_is_fully_mapped(ssk, skb)) {
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSNOMATCH);
return MAPPING_INVALID;
@@ -1044,6 +1046,7 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
subflow->map_data_csum = csum_unfold(mpext->csum);
/* Cfr RFC 8684 Section 3.3.0 */
+
if (unlikely(subflow->map_csum_reqd != csum_reqd))
return MAPPING_INVALID;
@@ -1180,9 +1183,19 @@ static bool subflow_check_data_avail(struct sock *ssk)
}
if (subflow->mp_join || subflow->fully_established) {
+ skb = skb_peek(&ssk->sk_receive_queue);
+ subflow->map_valid = 1;
+ subflow->map_seq = READ_ONCE(msk->ack_seq);
+ subflow->map_data_len = skb->len;
+ subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - subflow->ssn_offset;
+
+ WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL);
+ return true;
+
/* fatal protocol error, close the socket.
* subflow_error_report() will introduce the appropriate barriers
*/
+ /*
ssk->sk_err = EBADMSG;
tcp_set_state(ssk, TCP_CLOSE);
subflow->reset_transient = 0;
@@ -1190,6 +1203,7 @@ static bool subflow_check_data_avail(struct sock *ssk)
tcp_send_active_reset(ssk, GFP_ATOMIC);
WRITE_ONCE(subflow->data_avail, 0);
return false;
+ */
}
__mptcp_do_fallback(msk);
--
2.25.1
Hi Dmytro, Thank you for the patch! Perhaps something to improve: [auto build test WARNING on v5.16] [cannot apply to net-next/master linus/master next-20220116] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Dmytro-SHYTYI/net-mptcp-Fast-Open-Mechanism/20220116-081430 base: df0cc57e057f18e44dac8e6c18aba47ab53202f9 config: i386-randconfig-a012 (https://download.01.org/0day-ci/archive/20220116/202201162148.bSZpItDM-lkp@intel.com/config) compiler: gcc-9 (Debian 9.3.0-22) 9.3.0 reproduce (this is a W=1 build): # https://github.com/0day-ci/linux/commit/52c7bf82e2e91eb10c89ef6169fe02e0b63a6772 git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Dmytro-SHYTYI/net-mptcp-Fast-Open-Mechanism/20220116-081430 git checkout 52c7bf82e2e91eb10c89ef6169fe02e0b63a6772 # save the config file to linux build tree mkdir build_dir make W=1 O=build_dir ARCH=i386 SHELL=/bin/bash net/mptcp/ If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> All warnings (new ones prefixed by >>): net/mptcp/protocol.c: In function 'mptcp_sendmsg_fastopen': net/mptcp/protocol.c:1650:8: error: implicit declaration of function 'sk_stream_alloc_skb'; did you mean 'tcp_stream_alloc_skb'? [-Werror=implicit-function-declaration] 1650 | skb = sk_stream_alloc_skb(ssk->sk, 0, ssk->sk->sk_allocation, true); | ^~~~~~~~~~~~~~~~~~~ | tcp_stream_alloc_skb >> net/mptcp/protocol.c:1650:6: warning: assignment to 'struct sk_buff *' from 'int' makes pointer from integer without a cast [-Wint-conversion] 1650 | skb = sk_stream_alloc_skb(ssk->sk, 0, ssk->sk->sk_allocation, true); | ^ cc1: some warnings being treated as errors vim +1650 net/mptcp/protocol.c 1635 1636 static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, 1637 size_t len, struct mptcp_sock *msk, size_t copied) 1638 { 1639 const struct iphdr *iph; 1640 struct ubuf_info *uarg; 1641 struct sockaddr *uaddr; 1642 struct sk_buff *skb; 1643 struct tcp_sock *tp; 1644 struct socket *ssk; 1645 int ret; 1646 1647 ssk = __mptcp_nmpc_socket(msk); 1648 if (unlikely(!ssk)) 1649 goto out_EFAULT; > 1650 skb = sk_stream_alloc_skb(ssk->sk, 0, ssk->sk->sk_allocation, true); 1651 if (unlikely(!skb)) 1652 goto out_EFAULT; 1653 iph = ip_hdr(skb); 1654 if (unlikely(!iph)) 1655 goto out_EFAULT; 1656 uarg = msg_zerocopy_realloc(sk, len, skb_zcopy(skb)); 1657 if (unlikely(!uarg)) 1658 goto out_EFAULT; 1659 uaddr = msg->msg_name; 1660 1661 tp = tcp_sk(ssk->sk); 1662 if (unlikely(!tp)) 1663 goto out_EFAULT; 1664 if (!tp->fastopen_req) 1665 tp->fastopen_req = kzalloc(sizeof(*tp->fastopen_req), ssk->sk->sk_allocation); 1666 1667 if (unlikely(!tp->fastopen_req)) 1668 goto out_EFAULT; 1669 tp->fastopen_req->data = msg; 1670 tp->fastopen_req->size = len; 1671 tp->fastopen_req->uarg = uarg; 1672 1673 /* requests a cookie */ 1674 ret = mptcp_stream_connect(sk->sk_socket, uaddr, 1675 msg->msg_namelen, msg->msg_flags); 1676 1677 return ret; 1678 out_EFAULT: 1679 ret = -EFAULT; 1680 return ret; 1681 } 1682 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Hello, I think i fixed this locally but I'm not going to submit a new patch with a correction to avoid CI resource wasting. I wait until major change. If you think it might be interesting to upload a new patch with this minor change, please let me know. sk_stream_alloc_skb(ssk->sk, 0, ssk->sk->sk_allocation, true); | ^~~~~~~~~~~~~~~~~~~ | tcp_stream_alloc_skb Also I'm curious as this patch is failed to apply to current master, but I don't know the reason (which conflict do I have?)... ...locally I successfully build this patch against: 5.16.0-rc8 met-next Auto-merging net/mptcp/subflow.cCONFLICT (content): Merge conflict in net/mptcp/subflow.c Auto-merging net/mptcp/sockopt.cCONFLICT (content): Merge conflict in net/mptcp/sockopt.c Thanks for your advice. Dmytro. On 16/01/2022 13:15, kernel test robot wrote: > Hi Dmytro, > > Thank you for the patch! Perhaps something to improve: > > [auto build test WARNING on v5.16] > [cannot apply to net-next/master linus/master next-20220116] > [If your patch is applied to the wrong git tree, kindly drop us a note. > And when submitting patch, we suggest to use '--base' as documented in > https://git-scm.com/docs/git-format-patch] > > url: https://github.com/0day-ci/linux/commits/Dmytro-SHYTYI/net-mptcp-Fast-Open-Mechanism/20220116-081430 > base: df0cc57e057f18e44dac8e6c18aba47ab53202f9 > config: i386-randconfig-a012 (https://download.01.org/0day-ci/archive/20220116/202201162148.bSZpItDM-lkp@intel.com/config) > compiler: gcc-9 (Debian 9.3.0-22) 9.3.0 > reproduce (this is a W=1 build): > # https://github.com/0day-ci/linux/commit/52c7bf82e2e91eb10c89ef6169fe02e0b63a6772 > git remote add linux-review https://github.com/0day-ci/linux > git fetch --no-tags linux-review Dmytro-SHYTYI/net-mptcp-Fast-Open-Mechanism/20220116-081430 > git checkout 52c7bf82e2e91eb10c89ef6169fe02e0b63a6772 > # save the config file to linux build tree > mkdir build_dir > make W=1 O=build_dir ARCH=i386 SHELL=/bin/bash net/mptcp/ > > If you fix the issue, kindly add following tag as appropriate > Reported-by: kernel test robot <lkp@intel.com> > > All warnings (new ones prefixed by >>): > > net/mptcp/protocol.c: In function 'mptcp_sendmsg_fastopen': > net/mptcp/protocol.c:1650:8: error: implicit declaration of function 'sk_stream_alloc_skb'; did you mean 'tcp_stream_alloc_skb'? [-Werror=implicit-function-declaration] > 1650 | skb = sk_stream_alloc_skb(ssk->sk, 0, ssk->sk->sk_allocation, true); > | ^~~~~~~~~~~~~~~~~~~ > | tcp_stream_alloc_skb >>> net/mptcp/protocol.c:1650:6: warning: assignment to 'struct sk_buff *' from 'int' makes pointer from integer without a cast [-Wint-conversion] > 1650 | skb = sk_stream_alloc_skb(ssk->sk, 0, ssk->sk->sk_allocation, true); > | ^ > cc1: some warnings being treated as errors > > > vim +1650 net/mptcp/protocol.c > > 1635 > 1636 static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, > 1637 size_t len, struct mptcp_sock *msk, size_t copied) > 1638 { > 1639 const struct iphdr *iph; > 1640 struct ubuf_info *uarg; > 1641 struct sockaddr *uaddr; > 1642 struct sk_buff *skb; > 1643 struct tcp_sock *tp; > 1644 struct socket *ssk; > 1645 int ret; > 1646 > 1647 ssk = __mptcp_nmpc_socket(msk); > 1648 if (unlikely(!ssk)) > 1649 goto out_EFAULT; >> 1650 skb = sk_stream_alloc_skb(ssk->sk, 0, ssk->sk->sk_allocation, true); > 1651 if (unlikely(!skb)) > 1652 goto out_EFAULT; > 1653 iph = ip_hdr(skb); > 1654 if (unlikely(!iph)) > 1655 goto out_EFAULT; > 1656 uarg = msg_zerocopy_realloc(sk, len, skb_zcopy(skb)); > 1657 if (unlikely(!uarg)) > 1658 goto out_EFAULT; > 1659 uaddr = msg->msg_name; > 1660 > 1661 tp = tcp_sk(ssk->sk); > 1662 if (unlikely(!tp)) > 1663 goto out_EFAULT; > 1664 if (!tp->fastopen_req) > 1665 tp->fastopen_req = kzalloc(sizeof(*tp->fastopen_req), ssk->sk->sk_allocation); > 1666 > 1667 if (unlikely(!tp->fastopen_req)) > 1668 goto out_EFAULT; > 1669 tp->fastopen_req->data = msg; > 1670 tp->fastopen_req->size = len; > 1671 tp->fastopen_req->uarg = uarg; > 1672 > 1673 /* requests a cookie */ > 1674 ret = mptcp_stream_connect(sk->sk_socket, uaddr, > 1675 msg->msg_namelen, msg->msg_flags); > 1676 > 1677 return ret; > 1678 out_EFAULT: > 1679 ret = -EFAULT; > 1680 return ret; > 1681 } > 1682 > > --- > 0-DAY CI Kernel Test Service, Intel Corporation > https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org >
Hi Dmytro, Thank you for sharing this new version. On 16/01/2022 22:24, Dmytro SHYTYI wrote: > Hello, > > I think i fixed this locally but I'm not going to submit a new patch > with a correction to avoid CI resource wasting. I wait until major change. > If you think it might be interesting to upload a new patch with this > minor change, please let me know. Up to you. It is fine to submit a new version if you want more feedback from the CI. It can also help for the reviews if people can apply the patch on top of our "export" branch, please see below. Just if you do re-submit, please add "RFC" next to "PATCH" in the title. > sk_stream_alloc_skb(ssk->sk, 0, ssk->sk->sk_allocation, true); > | ^~~~~~~~~~~~~~~~~~~ > | tcp_stream_alloc_skb > > Also I'm curious as this patch is failed to apply to current master, but > I don't know the reason (which conflict do I have?)... ...locally I > successfully build this patch against: 5.16.0-rc8 met-next > Auto-merging net/mptcp/subflow.cCONFLICT (content): Merge conflict in > net/mptcp/subflow.c > Auto-merging net/mptcp/sockopt.cCONFLICT (content): Merge conflict in > net/mptcp/sockopt.c Your patch should be on top of our "export" branch -- or the 'for-review' one, same content but a continuous Git history -- from our repository on Github. https://github.com/multipath-tcp/mptcp_net-next/wiki/Git-Branches You can also see on Patchew it was not possible to apply your patch: https://patchew.org/MPTCP/20220116001259.203319-1-dmytro@shytyi.net/ Cheers, Matt -- Tessares | Belgium | Hybrid Access Solutions www.tessares.net
Hello Matthieu, Thanks for pointing me the Git branch that should be used. OK, i will add "RFC" next to "PATCH" in the title. Dmytro. On 17/01/2022 09:01, Matthieu Baerts wrote: > Hi Dmytro, > > Thank you for sharing this new version. > > On 16/01/2022 22:24, Dmytro SHYTYI wrote: >> Hello, >> >> I think i fixed this locally but I'm not going to submit a new patch >> with a correction to avoid CI resource wasting. I wait until major change. >> If you think it might be interesting to upload a new patch with this >> minor change, please let me know. > > Up to you. It is fine to submit a new version if you want more feedback > from the CI. It can also help for the reviews if people can apply the > patch on top of our "export" branch, please see below. > > Just if you do re-submit, please add "RFC" next to "PATCH" in the title. > > >> sk_stream_alloc_skb(ssk->sk, 0, ssk->sk->sk_allocation, true); >> | ^~~~~~~~~~~~~~~~~~~ >> | tcp_stream_alloc_skb >> >> Also I'm curious as this patch is failed to apply to current master, but >> I don't know the reason (which conflict do I have?)... ...locally I >> successfully build this patch against: 5.16.0-rc8 met-next >> Auto-merging net/mptcp/subflow.cCONFLICT (content): Merge conflict in >> net/mptcp/subflow.c >> Auto-merging net/mptcp/sockopt.cCONFLICT (content): Merge conflict in >> net/mptcp/sockopt.c > > Your patch should be on top of our "export" branch -- or the > 'for-review' one, same content but a continuous Git history -- from our > repository on Github. > > https://github.com/multipath-tcp/mptcp_net-next/wiki/Git-Branches > > You can also see on Patchew it was not possible to apply your patch: > > https://patchew.org/MPTCP/20220116001259.203319-1-dmytro@shytyi.net/ > > Cheers, > Matt >
Hi Dmytro, Thank you for the patch! Yet something to improve: [auto build test ERROR on v5.16] [cannot apply to net-next/master linus/master next-20220116] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Dmytro-SHYTYI/net-mptcp-Fast-Open-Mechanism/20220116-081430 base: df0cc57e057f18e44dac8e6c18aba47ab53202f9 config: i386-randconfig-a012 (https://download.01.org/0day-ci/archive/20220116/202201162159.uZep2msW-lkp@intel.com/config) compiler: gcc-9 (Debian 9.3.0-22) 9.3.0 reproduce (this is a W=1 build): # https://github.com/0day-ci/linux/commit/52c7bf82e2e91eb10c89ef6169fe02e0b63a6772 git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Dmytro-SHYTYI/net-mptcp-Fast-Open-Mechanism/20220116-081430 git checkout 52c7bf82e2e91eb10c89ef6169fe02e0b63a6772 # save the config file to linux build tree mkdir build_dir make W=1 O=build_dir ARCH=i386 SHELL=/bin/bash If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> All errors (new ones prefixed by >>): net/mptcp/protocol.c: In function 'mptcp_sendmsg_fastopen': >> net/mptcp/protocol.c:1650:8: error: implicit declaration of function 'sk_stream_alloc_skb'; did you mean 'tcp_stream_alloc_skb'? [-Werror=implicit-function-declaration] 1650 | skb = sk_stream_alloc_skb(ssk->sk, 0, ssk->sk->sk_allocation, true); | ^~~~~~~~~~~~~~~~~~~ | tcp_stream_alloc_skb net/mptcp/protocol.c:1650:6: warning: assignment to 'struct sk_buff *' from 'int' makes pointer from integer without a cast [-Wint-conversion] 1650 | skb = sk_stream_alloc_skb(ssk->sk, 0, ssk->sk->sk_allocation, true); | ^ cc1: some warnings being treated as errors vim +1650 net/mptcp/protocol.c 1635 1636 static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, 1637 size_t len, struct mptcp_sock *msk, size_t copied) 1638 { 1639 const struct iphdr *iph; 1640 struct ubuf_info *uarg; 1641 struct sockaddr *uaddr; 1642 struct sk_buff *skb; 1643 struct tcp_sock *tp; 1644 struct socket *ssk; 1645 int ret; 1646 1647 ssk = __mptcp_nmpc_socket(msk); 1648 if (unlikely(!ssk)) 1649 goto out_EFAULT; > 1650 skb = sk_stream_alloc_skb(ssk->sk, 0, ssk->sk->sk_allocation, true); 1651 if (unlikely(!skb)) 1652 goto out_EFAULT; 1653 iph = ip_hdr(skb); 1654 if (unlikely(!iph)) 1655 goto out_EFAULT; 1656 uarg = msg_zerocopy_realloc(sk, len, skb_zcopy(skb)); 1657 if (unlikely(!uarg)) 1658 goto out_EFAULT; 1659 uaddr = msg->msg_name; 1660 1661 tp = tcp_sk(ssk->sk); 1662 if (unlikely(!tp)) 1663 goto out_EFAULT; 1664 if (!tp->fastopen_req) 1665 tp->fastopen_req = kzalloc(sizeof(*tp->fastopen_req), ssk->sk->sk_allocation); 1666 1667 if (unlikely(!tp->fastopen_req)) 1668 goto out_EFAULT; 1669 tp->fastopen_req->data = msg; 1670 tp->fastopen_req->size = len; 1671 tp->fastopen_req->uarg = uarg; 1672 1673 /* requests a cookie */ 1674 ret = mptcp_stream_connect(sk->sk_socket, uaddr, 1675 msg->msg_namelen, msg->msg_flags); 1676 1677 return ret; 1678 out_EFAULT: 1679 ret = -EFAULT; 1680 return ret; 1681 } 1682 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Hello, On Sun, 2022-01-16 at 00:12 +0000, Dmytro SHYTYI wrote: > This set of patches will bring "Fast Open" Option support to MPTCP. > The aim of Fast Open Mechanism is to eliminate one round trip > time from a TCP conversation by allowing data to be included as > part of the SYN segment that initiates the connection. > > IETF RFC 8684: Appendix B. TCP Fast Open and MPTCP. > > [PATCH v2] includes "client-server" partial support for : > 1. MPTCP cookie request from client. > 2. MPTCP cookie offering from server. > 3. MPTCP SYN+DATA+COOKIE from client. > 4. subsequent write + read on the opened socket. > > This patch is Work In Progress and an early draft shared due community > request. It looks like this is more far from an RFC status than I thought. There are a number of tasks to be completed for that: - do not comment out existing working code, expecially TCP one. - split the patch in several small one, clearly documenting in the individual commit message what each patch is doing and why. A possible skeleton could be: * create the needed hooks in the TCP code, providing a dummy implementation. Each hook will need a solid justification, and they should not affect the TCP critical path. The number of added hooks should be as low as possible. You should try to use the existing hooking whenever it should be possible. No patch should touch the TCP code after the hooks are created. Side note: I'm unsure if any additional hooks are really needed. * add the real MPTCP impl, possibly with separate patches for the client and the server side * add self-tests - avoid introducing white-space only changes. There are a bunch of specific inline notes below, which are still very important. > > Signed-off-by: Dmytro SHYTYI <dmytro@shytyi.net> > --- > include/linux/tcp.h | 7 ++++ > net/ipv4/inet_connection_sock.c | 3 +- > net/ipv4/tcp_fastopen.c | 42 +++++++++++++++++++---- > net/ipv4/tcp_input.c | 16 +++++---- > net/mptcp/protocol.c | 59 ++++++++++++++++++++++++++++++--- > net/mptcp/sockopt.c | 40 ++++++++++++++++++++++ > net/mptcp/subflow.c | 14 ++++++++ > 7 files changed, 162 insertions(+), 19 deletions(-) > > diff --git a/include/linux/tcp.h b/include/linux/tcp.h > index 48d8a363319e..d7092234d442 100644 > --- a/include/linux/tcp.h > +++ b/include/linux/tcp.h > @@ -54,7 +54,14 @@ static inline unsigned int tcp_optlen(const struct sk_buff *skb) > /* TCP Fast Open */ > #define TCP_FASTOPEN_COOKIE_MIN 4 /* Min Fast Open Cookie size in bytes */ > #define TCP_FASTOPEN_COOKIE_MAX 16 /* Max Fast Open Cookie size in bytes */ > + > +#if IS_ENABLED(CONFIG_MPTCP) > +#define TCP_FASTOPEN_COOKIE_SIZE 4 /* the size employed by MPTCP impl. */ > +#else > #define TCP_FASTOPEN_COOKIE_SIZE 8 /* the size employed by this impl. */ > +#endif You really need to document this kind of changes in the code, possibly referring to the RFC bits requiring it. > + > + > > /* TCP Fast Open Cookie as stored in memory */ > struct tcp_fastopen_cookie { > diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c > index f7fea3a7c5e6..4b4159c0258d 100644 > --- a/net/ipv4/inet_connection_sock.c > +++ b/net/ipv4/inet_connection_sock.c > @@ -501,7 +501,8 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern) > req = reqsk_queue_remove(queue, sk); > newsk = req->sk; > > - if (sk->sk_protocol == IPPROTO_TCP && > + if ((sk->sk_protocol == IPPROTO_TCP || > + sk->sk_protocol == IPPROTO_MPTCP) && > tcp_rsk(req)->tfo_listener) { > spin_lock_bh(&queue->fastopenq.lock); > if (tcp_rsk(req)->tfo_listener) { This chunk is really not needed. mptcp invokes inet_csk_accept(), passing the initial subflow as the first argument. That socket is really a TCP one. Cheers, Paolo
On Mon, 2022-01-17 at 10:58 +0100, Paolo Abeni wrote: > > diff --git a/include/linux/tcp.h b/include/linux/tcp.h > > index 48d8a363319e..d7092234d442 100644 > > --- a/include/linux/tcp.h > > +++ b/include/linux/tcp.h > > @@ -54,7 +54,14 @@ static inline unsigned int tcp_optlen(const struct sk_buff *skb) > > /* TCP Fast Open */ > > #define TCP_FASTOPEN_COOKIE_MIN 4 /* Min Fast Open Cookie size in bytes */ > > #define TCP_FASTOPEN_COOKIE_MAX 16 /* Max Fast Open Cookie size in bytes */ > > + > > +#if IS_ENABLED(CONFIG_MPTCP) > > +#define TCP_FASTOPEN_COOKIE_SIZE 4 /* the size employed by MPTCP impl. */ > > +#else > > #define TCP_FASTOPEN_COOKIE_SIZE 8 /* the size employed by this impl. */ > > +#endif > > You really need to document this kind of changes in the code, possibly > referring to the RFC bits requiring it. Note that MPC + syn currently uses 24 bytes of TCP option spaces, and MPC + syn + ack is using 32 bytes, to there is enough room for 8 bytes cookies. /P
Hi Dmytro, On 17/01/2022 11:03, Paolo Abeni wrote: > On Mon, 2022-01-17 at 10:58 +0100, Paolo Abeni wrote: >>> diff --git a/include/linux/tcp.h b/include/linux/tcp.h >>> index 48d8a363319e..d7092234d442 100644 >>> --- a/include/linux/tcp.h >>> +++ b/include/linux/tcp.h >>> @@ -54,7 +54,14 @@ static inline unsigned int tcp_optlen(const struct sk_buff *skb) >>> /* TCP Fast Open */ >>> #define TCP_FASTOPEN_COOKIE_MIN 4 /* Min Fast Open Cookie size in bytes */ >>> #define TCP_FASTOPEN_COOKIE_MAX 16 /* Max Fast Open Cookie size in bytes */ >>> + >>> +#if IS_ENABLED(CONFIG_MPTCP) >>> +#define TCP_FASTOPEN_COOKIE_SIZE 4 /* the size employed by MPTCP impl. */ >>> +#else >>> #define TCP_FASTOPEN_COOKIE_SIZE 8 /* the size employed by this impl. */ >>> +#endif >> >> You really need to document this kind of changes in the code, possibly >> referring to the RFC bits requiring it. > > Note that MPC + syn currently uses 24 bytes of TCP option spaces, and > MPC + syn + ack is using 32 bytes, to there is enough room for 8 bytes > cookies. Indeed, in MPTCPv1 (RFC8684), there is more space in the MPC+SYN as mentioned in the Appendix B you referred to. Also, you cannot change the value here depending on the kernel configuration: that will break TFO for "plain" TCP I suppose. Adding this feature is quite complex because our primary goal is not to have any impact on TCP connections without MPTCP. Any modification in TCP code needs to be very well justified to be accepted upstream. Do not hesitate to join one of our weekly meeting and/or IRC channel [1] to discuss about the implementation of this feature. Because of the task's complexity, it is probably better to discuss than having long and probably discouraging feedback loops by email. WDYT? [1] https://github.com/multipath-tcp/mptcp_net-next/wiki#resources Cheers, Matt -- Tessares | Belgium | Hybrid Access Solutions www.tessares.net
Hello Matthieu, On 17/01/2022 10:22, Matthieu Baerts wrote: > Hi Dmytro, > > On 17/01/2022 11:03, Paolo Abeni wrote: >> On Mon, 2022-01-17 at 10:58 +0100, Paolo Abeni wrote: >>>> diff --git a/include/linux/tcp.h b/include/linux/tcp.h >>>> index 48d8a363319e..d7092234d442 100644 >>>> --- a/include/linux/tcp.h >>>> +++ b/include/linux/tcp.h >>>> @@ -54,7 +54,14 @@ static inline unsigned int tcp_optlen(const struct sk_buff *skb) >>>> /* TCP Fast Open */ >>>> #define TCP_FASTOPEN_COOKIE_MIN 4 /* Min Fast Open Cookie size in bytes */ >>>> #define TCP_FASTOPEN_COOKIE_MAX 16 /* Max Fast Open Cookie size in bytes */ >>>> + >>>> +#if IS_ENABLED(CONFIG_MPTCP) >>>> +#define TCP_FASTOPEN_COOKIE_SIZE 4 /* the size employed by MPTCP impl. */ >>>> +#else >>>> #define TCP_FASTOPEN_COOKIE_SIZE 8 /* the size employed by this impl. */ >>>> +#endif >>> >>> You really need to document this kind of changes in the code, possibly >>> referring to the RFC bits requiring it. >> >> Note that MPC + syn currently uses 24 bytes of TCP option spaces, and >> MPC + syn + ack is using 32 bytes, to there is enough room for 8 bytes >> cookies. > > Indeed, in MPTCPv1 (RFC8684), there is more space in the MPC+SYN as > mentioned in the Appendix B you referred to. > > Also, you cannot change the value here depending on the kernel > configuration: that will break TFO for "plain" TCP I suppose. I agree. I might find other way to enter this particular case if needed at all. > Adding this feature is quite complex because our primary goal is not to > have any impact on TCP connections without MPTCP. Any modification in > TCP code needs to be very well justified to be accepted upstream. Indeed. :) > Do not hesitate to join one of our weekly meeting and/or IRC channel [1] > to discuss about the implementation of this feature. Because of the > task's complexity, it is probably better to discuss than having long and > probably discouraging feedback loops by email. WDYT? > > [1] https://github.com/multipath-tcp/mptcp_net-next/wiki#resources I will try to participate. Thank you for invitation! > Cheers, > Matt > Best regards, Dmytro.
Hi Paolo, AFAIK, TFO option requires more that 8 bytes, if cookie len is 8 bytes: In file /net/ipv4/tcp_output.c (line 689): static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, struct tcp_out_options *opts) .... { if (unlikely(OPTION_FAST_OPEN_COOKIE & options)) { struct tcp_fastopen_cookie *foc = opts->fastopen_cookie; u8 *p = (u8 *)ptr; u32 len; /* Fast Open option length */ if (foc->exp) { len = TCPOLEN_EXP_FASTOPEN_BASE + foc->len; *ptr = htonl((TCPOPT_EXP << 24) | (len << 16) | TCPOPT_FASTOPEN_MAGIC); p += TCPOLEN_EXP_FASTOPEN_BASE; } else { len = TCPOLEN_FASTOPEN_BASE + foc->len; *p++ = TCPOPT_FASTOPEN; *p++ = len; } memcpy(p, foc->val, foc->len); if ((len & 3) == 2) { p[foc->len] = TCPOPT_NOP; p[foc->len + 1] = TCPOPT_NOP; } ptr += (len + 3) >> 2; } ... Here we do have "len = TCPOLEN_FASTOPEN_BASE + foc->len" that is slightly more than 8 bytes. This is the cause of introducing 4 byte cookie for MPTCP implementation. The RFC 8684 B.1. says: This is done with the TCP cookie request/response options, of 2 bytes and 6-18 bytes, respectively (depending on the chosen cookie length). So in my understanding, the "selected cookie size + TCPOLEN_FASTOPEN_BASE" = 6, is not prohibited by, and in the allowed range of the RFC 8684. As you suggested I will document the size of the cookie, if there is no objections to above information :) Dmytro. On 17/01/2022 10:03, Paolo Abeni wrote: > On Mon, 2022-01-17 at 10:58 +0100, Paolo Abeni wrote: >>> diff --git a/include/linux/tcp.h b/include/linux/tcp.h >>> index 48d8a363319e..d7092234d442 100644 >>> --- a/include/linux/tcp.h >>> +++ b/include/linux/tcp.h >>> @@ -54,7 +54,14 @@ static inline unsigned int tcp_optlen(const struct sk_buff *skb) >>> /* TCP Fast Open */ >>> #define TCP_FASTOPEN_COOKIE_MIN 4 /* Min Fast Open Cookie size in bytes */ >>> #define TCP_FASTOPEN_COOKIE_MAX 16 /* Max Fast Open Cookie size in bytes */ >>> + >>> +#if IS_ENABLED(CONFIG_MPTCP) >>> +#define TCP_FASTOPEN_COOKIE_SIZE 4 /* the size employed by MPTCP impl. */ >>> +#else >>> #define TCP_FASTOPEN_COOKIE_SIZE 8 /* the size employed by this impl. */ >>> +#endif >> >> You really need to document this kind of changes in the code, possibly >> referring to the RFC bits requiring it. > Note that MPC + syn currently uses 24 bytes of TCP option spaces, and > MPC + syn + ack is using 32 bytes, to there is enough room for 8 bytes > cookies. > > /P >
On Mon, 2022-01-17 at 21:48 +0000, Dmytro SHYTYI wrote: > Hi Paolo, > > AFAIK, TFO option requires more that 8 bytes, if cookie len is 8 bytes: > > In file /net/ipv4/tcp_output.c (line 689): > > static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, > struct tcp_out_options *opts) > .... > { > if (unlikely(OPTION_FAST_OPEN_COOKIE & options)) { > struct tcp_fastopen_cookie *foc = opts->fastopen_cookie; > u8 *p = (u8 *)ptr; > u32 len; /* Fast Open option length */ > > if (foc->exp) { > len = TCPOLEN_EXP_FASTOPEN_BASE + foc->len; > *ptr = htonl((TCPOPT_EXP << 24) | (len << 16) | > TCPOPT_FASTOPEN_MAGIC); > p += TCPOLEN_EXP_FASTOPEN_BASE; > } else { > len = TCPOLEN_FASTOPEN_BASE + foc->len; > *p++ = TCPOPT_FASTOPEN; > *p++ = len; > } > > memcpy(p, foc->val, foc->len); > if ((len & 3) == 2) { > p[foc->len] = TCPOPT_NOP; > p[foc->len + 1] = TCPOPT_NOP; > } > ptr += (len + 3) >> 2; > } > ... > > Here we do have "len = TCPOLEN_FASTOPEN_BASE + foc->len" that is > slightly more than 8 bytes. > > This is the cause of introducing 4 byte cookie for MPTCP implementation. > > The RFC 8684 B.1. says: > This is done with the TCP cookie request/response > options, of 2 bytes and 6-18 bytes, respectively (depending on the > chosen cookie length). > > So in my understanding, the "selected cookie size + > TCPOLEN_FASTOPEN_BASE" = 6, is not prohibited by, and in the allowed > range of the RFC 8684. You are right, I underlooked the TFO option writing. > As you suggested I will document the size of the cookie, if there is no > objections to above information :) Shrinking the TFO cookie size has both security and functional impact: - we do not want to change the TFO cookie size for TCP connection in MPTCP enabled build. - dynamically selecting the TFO cookie size depending on the specific rsk type will add more complexity to the TCP code. rfc8684 suggests we could remove the TS option from the synack packet. I think it's worthy to try such idea. To do that, we need to change the 'mptcp_synack_options()' hook signature, to accept a 'struct tcp_out_options' as the 3rd argument, so that mptcp_synack_options() can clear the TCPOLEN_TSTAMP_ALIGNED bit when needed. Thanks Paolo
Hello Paolo, On 18/01/2022 11:02, Paolo Abeni wrote: > On Mon, 2022-01-17 at 21:48 +0000, Dmytro SHYTYI wrote: >> Hi Paolo, >> >> AFAIK, TFO option requires more that 8 bytes, if cookie len is 8 bytes: >> >> In file /net/ipv4/tcp_output.c (line 689): >> >> static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, >> struct tcp_out_options *opts) >> .... >> { >> if (unlikely(OPTION_FAST_OPEN_COOKIE & options)) { >> struct tcp_fastopen_cookie *foc = opts->fastopen_cookie; >> u8 *p = (u8 *)ptr; >> u32 len; /* Fast Open option length */ >> >> if (foc->exp) { >> len = TCPOLEN_EXP_FASTOPEN_BASE + foc->len; >> *ptr = htonl((TCPOPT_EXP << 24) | (len << 16) | >> TCPOPT_FASTOPEN_MAGIC); >> p += TCPOLEN_EXP_FASTOPEN_BASE; >> } else { >> len = TCPOLEN_FASTOPEN_BASE + foc->len; >> *p++ = TCPOPT_FASTOPEN; >> *p++ = len; >> } >> >> memcpy(p, foc->val, foc->len); >> if ((len & 3) == 2) { >> p[foc->len] = TCPOPT_NOP; >> p[foc->len + 1] = TCPOPT_NOP; >> } >> ptr += (len + 3) >> 2; >> } >> ... >> >> Here we do have "len = TCPOLEN_FASTOPEN_BASE + foc->len" that is >> slightly more than 8 bytes. >> >> This is the cause of introducing 4 byte cookie for MPTCP implementation. >> >> The RFC 8684 B.1. says: >> This is done with the TCP cookie request/response >> options, of 2 bytes and 6-18 bytes, respectively (depending on the >> chosen cookie length). >> >> So in my understanding, the "selected cookie size + >> TCPOLEN_FASTOPEN_BASE" = 6, is not prohibited by, and in the allowed >> range of the RFC 8684. > > You are right, I underlooked the TFO option writing. > >> As you suggested I will document the size of the cookie, if there is no >> objections to above information :) > > Shrinking the TFO cookie size has both security and functional impact: > > - we do not want to change the TFO cookie size for TCP connection in > MPTCP enabled build. > - dynamically selecting the TFO cookie size depending on the specific > rsk type will add more complexity to the TCP code. I agree with functional and future security impact in case if we reduce the cookie size. > rfc8684 suggests we could remove the TS option from the synack packet. > I think it's worthy to try such idea. > > To do that, we need to change the 'mptcp_synack_options()' hook > signature, to accept a 'struct tcp_out_options' as the 3rd argument, so > that mptcp_synack_options() can clear the TCPOLEN_TSTAMP_ALIGNED bit > when needed. With this we can try to keep "cookie len == 8". It will simplify things. I will look into it. > Thanks > > Paolo > Thanks, Dmytro.
Hello Paolo, On 19/01/2022 17:35, Dmytro SHYTYI wrote: > Hello Paolo, > > On 18/01/2022 11:02, Paolo Abeni wrote: >> On Mon, 2022-01-17 at 21:48 +0000, Dmytro SHYTYI wrote: >>> Hi Paolo, >>> >>> AFAIK, TFO option requires more that 8 bytes, if cookie len is 8 bytes: >>> >>> In file /net/ipv4/tcp_output.c (line 689): >>> >>> static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, >>> struct tcp_out_options *opts) >>> .... >>> { >>> if (unlikely(OPTION_FAST_OPEN_COOKIE & options)) { >>> struct tcp_fastopen_cookie *foc = >>> opts->fastopen_cookie; >>> u8 *p = (u8 *)ptr; >>> u32 len; /* Fast Open option length */ >>> >>> if (foc->exp) { >>> len = TCPOLEN_EXP_FASTOPEN_BASE + foc->len; >>> *ptr = htonl((TCPOPT_EXP << 24) | (len << >>> 16) | >>> TCPOPT_FASTOPEN_MAGIC); >>> p += TCPOLEN_EXP_FASTOPEN_BASE; >>> } else { >>> len = TCPOLEN_FASTOPEN_BASE + foc->len; >>> *p++ = TCPOPT_FASTOPEN; >>> *p++ = len; >>> } >>> >>> memcpy(p, foc->val, foc->len); >>> if ((len & 3) == 2) { >>> p[foc->len] = TCPOPT_NOP; >>> p[foc->len + 1] = TCPOPT_NOP; >>> } >>> ptr += (len + 3) >> 2; >>> } >>> ... >>> >>> Here we do have "len = TCPOLEN_FASTOPEN_BASE + foc->len" that is >>> slightly more than 8 bytes. >>> >>> This is the cause of introducing 4 byte cookie for MPTCP implementation. >>> >>> The RFC 8684 B.1. says: >>> This is done with the TCP cookie request/response >>> options, of 2 bytes and 6-18 bytes, respectively (depending on the >>> chosen cookie length). >>> >>> So in my understanding, the "selected cookie size + >>> TCPOLEN_FASTOPEN_BASE" = 6, is not prohibited by, and in the allowed >>> range of the RFC 8684. >> >> You are right, I underlooked the TFO option writing. >> >>> As you suggested I will document the size of the cookie, if there is no >>> objections to above information :) >> >> Shrinking the TFO cookie size has both security and functional impact: >> >> - we do not want to change the TFO cookie size for TCP connection in >> MPTCP enabled build. >> - dynamically selecting the TFO cookie size depending on the specific >> rsk type will add more complexity to the TCP code. > > I agree with functional and future security impact in case if we reduce > the cookie size. > >> rfc8684 suggests we could remove the TS option from the synack packet. >> I think it's worthy to try such idea. >> >> To do that, we need to change the 'mptcp_synack_options()' hook >> signature, to accept a 'struct tcp_out_options' as the 3rd argument, so >> that mptcp_synack_options() can clear the TCPOLEN_TSTAMP_ALIGNED bit >> when needed. > > With this we can try to keep "cookie len == 8". It will simplify things. > I will look into it. Thanks Paolo! It looks like it works in my local setup with "MPTCP" and "cookie len == 8", while changing the function signature to: bool mptcp_synack_options(const struct request_sock *req, unsigned int *size, struct mptcp_out_options *opts, u16 *tcp_options) and further, edit tcp_options with: *tcp_options ^= OPTION_TS; Also I needed to do something like this in func "mptcp_synack_options": *size = TCPOLEN_MPTCP_MPC_SYNACK - TCPOLEN_TSTAMP_ALIGNED + TCPOLEN_SACKPERM_ALIGNED; This affected (excluded) some parts of my code that not anymore have impact on the TCP implementation. Result: tcpdump with 8 byte cookie len + mptcp option without TSvals in SYNACK: >> cookie req: 2022-01-20 22:10:04.275317 IP 10.0.0.1.49572 > 10.0.0.2.7003: Flags [S], seq 4140071331, win 64240, options [mss 1460,sackOK,TS val 608747398 ecr 0,nop,wscale 7,tfo cookiereq,nop,nop,mptcp capable[bad opt]> 2022-01-20 22:10:04.275353 IP 10.0.0.2.7003 > 10.0.0.1.49572: Flags >> cookie offer "0696fccb1b35cc39" + mptcp capable: [S.], seq 2992182232, ack 4140071332, win 65160, options [mss 1460,nop,nop,sackOK,nop,wscale 7,tfo cookie 0696fccb1b35cc39,nop,nop,mptcp capable Unknown Version (1)], length 0 2022-01-20 22:10:04.275406 IP 10.0.0.1.49572 > 10.0.0.2.7003: Flags [.], ack 1, win 502, options [mptcp capable Unknown Version (1)], length 0 2022-01-20 22:10:04.275535 IP 10.0.0.1.49572 > 10.0.0.2.7003: Flags [P.], seq 1:4, ack 1, win 502, options [mptcp capable[bad opt] 2022-01-20 22:10:04.275597 IP 10.0.0.2.7003 > 10.0.0.1.49572: Flags [.], ack 4, win 510, options [nop,nop,TS val 458375399 ecr 608747398,mptcp dss ack 17405876484214860771], length 0 2022-01-20 22:10:04.275745 IP 10.0.0.1.49572 > 10.0.0.2.7003: Flags [P.], seq 4:7, ack 1, win 502, options [mptcp dss ack 3584389301 seq 17405876484214860771 subseq 4 len 3,nop,nop], length 3 2022-01-20 22:10:04.275850 IP 10.0.0.2.7003 > 10.0.0.1.49572: Flags [.], ack 7, win 510, options [nop,nop,TS val 458375399 ecr 608747398,mptcp dss ack 17405876484214860774], length 0 2022-01-20 22:10:04.275967 IP 10.0.0.1.49572 > 10.0.0.2.7003: Flags [P.], seq 7:10, ack 1, win 502, options [mptcp dss ack 3584389301 seq 17405876484214860774 subseq 7 len 3,nop,nop], length 3 2022-01-20 22:10:04.275977 IP 10.0.0.2.7003 > 10.0.0.1.49572: Flags [.], ack 10, win 510, options [nop,nop,TS val 458375399 ecr 608747398,mptcp dss ack 17405876484214860777], length 0 2022-01-20 22:10:04.276073 IP 10.0.0.1.49572 > 10.0.0.2.7003: Flags [P.], seq 10:13, ack 1, win 502, options [mptcp dss ack 3584389301 seq 17405876484214860777 subseq 10 len 3,nop,nop], length 3 2022-01-20 22:10:04.276133 IP 10.0.0.2.7003 > 10.0.0.1.49572: Flags [.], ack 13, win 510, options [nop,nop,TS val 458375399 ecr 608747398,mptcp dss ack 17405876484214860780], length 0 2022-01-20 22:10:04.276156 IP 10.0.0.1.49572 > 10.0.0.2.7003: Flags [P.], seq 13:16, ack 1, win 502, options [mptcp dss ack 3584389301 seq 17405876484214860780 subseq 13 len 3,nop,nop], length 3 2022-01-20 22:10:04.276161 IP 10.0.0.2.7003 > 10.0.0.1.49572: Flags [.], ack 16, win 510, options [nop,nop,TS val 458375399 ecr 608747398,mptcp dss ack 17405876484214860783], length 0 2022-01-20 22:10:05.902411 IP 10.0.0.1.49574 > 10.0.0.2.7003: Flags [S], seq 3696317251:3696317254, win 64240, options [mss 1460,sackOK,TS val 608749026 ecr 0,nop,wscale 7,tfo cookie 0696fccb1b35cc39,nop,nop,mptcp capable[bad opt]> 2022-01-20 22:10:05.902456 IP 10.0.0.2.7003 > 10.0.0.1.49574: Flags [S.], seq 3169634712, ack 3696317255, win 65160, options [mss 1460,nop,nop,sackOK,nop,wscale 7,mptcp capable Unknown Version (1)], length 0 2022-01-20 22:10:05.902496 IP 10.0.0.1.49574 > 10.0.0.2.7003: Flags [.], ack 1, win 502, options [mptcp capable Unknown Version (1)], length 0 2022-01-20 22:10:05.902559 IP 10.0.0.1.49574 > 10.0.0.2.7003: Flags [P.], seq 1:4, ack 1, win 502, options [mptcp capable[bad opt] 2022-01-20 22:10:05.902597 IP 10.0.0.2.7003 > 10.0.0.1.49574: Flags [.], ack 4, win 510, options [nop,nop,TS val 458377026 ecr 608749026,mptcp dss ack 1878450330782657922], length 0 2022-01-20 22:10:05.902620 IP 10.0.0.1.49574 > 10.0.0.2.7003: Flags [P.], seq 4:7, ack 1, win 502, options [mptcp dss ack 414760887 seq 2527731727539835193 subseq 4 len 3,nop,nop], length 3 2022-01-20 22:10:05.902638 IP 10.0.0.2.7003 > 10.0.0.1.49574: Flags [.], ack 7, win 510, options [nop,nop,TS val 458377026 ecr 608749026,mptcp dss ack 1878450330782657925], length 0 2022-01-20 22:10:05.902644 IP 10.0.0.1.49574 > 10.0.0.2.7003: Flags [P.], seq 7:10, ack 1, win 502, options [mptcp dss ack 414760887 seq 2527731727539835196 subseq 7 len 3,nop,nop], length 3 2022-01-20 22:10:05.902647 IP 10.0.0.2.7003 > 10.0.0.1.49574: Flags [.], ack 10, win 510, options [nop,nop,TS val 458377026 ecr 608749026,mptcp dss ack 1878450330782657928], length 0 2022-01-20 22:10:05.902653 IP 10.0.0.1.49574 > 10.0.0.2.7003: Flags [P.], seq 10:13, ack 1, win 502, options [mptcp dss ack 414760887 seq 2527731727539835199 subseq 10 len 3,nop,nop], length 3 2022-01-20 22:10:05.902680 IP 10.0.0.2.7003 > 10.0.0.1.49574: Flags [.], ack 13, win 510, options [nop,nop,TS val 458377026 ecr 608749026,mptcp dss ack 1878450330782657931], length 0 Dmytro. >> Thanks >> >> Paolo >> > > Thanks, > Dmytro.
Hello Paolo, On 17/01/2022 09:58, Paolo Abeni wrote: > Hello, > > On Sun, 2022-01-16 at 00:12 +0000, Dmytro SHYTYI wrote: >> This set of patches will bring "Fast Open" Option support to MPTCP. >> The aim of Fast Open Mechanism is to eliminate one round trip >> time from a TCP conversation by allowing data to be included as >> part of the SYN segment that initiates the connection. >> >> IETF RFC 8684: Appendix B. TCP Fast Open and MPTCP. >> >> [PATCH v2] includes "client-server" partial support for : >> 1. MPTCP cookie request from client. >> 2. MPTCP cookie offering from server. >> 3. MPTCP SYN+DATA+COOKIE from client. >> 4. subsequent write + read on the opened socket. >> >> This patch is Work In Progress and an early draft shared due community >> request. > > It looks like this is more far from an RFC status than I thought. I agree. > There are a number of tasks to be completed for that: > - do not comment out existing working code, expecially TCP one. > - split the patch in several small one, clearly documenting in the > individual commit message what each patch is doing and why. > A possible skeleton could be: > * create the needed hooks in the TCP code, providing a dummy > implementation. Each hook will need a solid justification, > and they should not affect the TCP critical path. > The number of added hooks should be as low as possible. > You should try to use the existing hooking whenever it should be > possible. > No patch should touch the TCP code after the hooks are created. > Side note: I'm unsure if any additional hooks are really needed. > * add the real MPTCP impl, possibly with separate patches for > the client and the server side > * add self-tests > - avoid introducing white-space only changes. Thank you for sharing this. I'm going to take this into account. I might participate in the next MPTCP meeting to clarify some points. > There are a bunch of specific inline notes below, which are still very > important. Thanks! >> Signed-off-by: Dmytro SHYTYI <dmytro@shytyi.net> >> --- >> include/linux/tcp.h | 7 ++++ >> net/ipv4/inet_connection_sock.c | 3 +- >> net/ipv4/tcp_fastopen.c | 42 +++++++++++++++++++---- >> net/ipv4/tcp_input.c | 16 +++++---- >> net/mptcp/protocol.c | 59 ++++++++++++++++++++++++++++++--- >> net/mptcp/sockopt.c | 40 ++++++++++++++++++++++ >> net/mptcp/subflow.c | 14 ++++++++ >> 7 files changed, 162 insertions(+), 19 deletions(-) >> >> >> /* TCP Fast Open Cookie as stored in memory */ >> struct tcp_fastopen_cookie { >> diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c >> index f7fea3a7c5e6..4b4159c0258d 100644 >> --- a/net/ipv4/inet_connection_sock.c >> +++ b/net/ipv4/inet_connection_sock.c >> @@ -501,7 +501,8 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern) >> req = reqsk_queue_remove(queue, sk); >> newsk = req->sk; >> >> - if (sk->sk_protocol == IPPROTO_TCP && >> + if ((sk->sk_protocol == IPPROTO_TCP || >> + sk->sk_protocol == IPPROTO_MPTCP) && >> tcp_rsk(req)->tfo_listener) { >> spin_lock_bh(&queue->fastopenq.lock); >> if (tcp_rsk(req)->tfo_listener) { > > This chunk is really not needed. mptcp invokes inet_csk_accept(), > passing the initial subflow as the first argument. That socket is > really a TCP one. I agree. > Cheers, > > Paolo > Best regards, Dmytro
Hi Dmytro, url: https://github.com/0day-ci/linux/commits/Dmytro-SHYTYI/net-mptcp-Fast-Open-Mechanism/20220116-081430 base: df0cc57e057f18e44dac8e6c18aba47ab53202f9 config: x86_64-randconfig-m001 (https://download.01.org/0day-ci/archive/20220117/202201170247.BMTU5XYy-lkp@intel.com/config) compiler: gcc-9 (Debian 9.3.0-22) 9.3.0 If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> Reported-by: Dan Carpenter <dan.carpenter@oracle.com> smatch warnings: net/ipv4/tcp_input.c:6960 tcp_conn_request() warn: ignoring unreachable code. vim +6960 net/ipv4/tcp_input.c 9caad864151e52 Eric Dumazet 2016-04-01 6951 if (want_cookie) { 9caad864151e52 Eric Dumazet 2016-04-01 6952 reqsk_free(req); 9caad864151e52 Eric Dumazet 2016-04-01 6953 return 0; 9caad864151e52 Eric Dumazet 2016-04-01 6954 } 1fb6f159fd21c6 Octavian Purdila 2014-06-25 6955 } ca6fb06518836e Eric Dumazet 2015-10-02 6956 reqsk_put(req); 1fb6f159fd21c6 Octavian Purdila 2014-06-25 6957 return 0; ^^^^^^^^ 1fb6f159fd21c6 Octavian Purdila 2014-06-25 6958 52c7bf82e2e91e Dmytro SHYTYI 2022-01-16 6959 //drop_and_release: 1fb6f159fd21c6 Octavian Purdila 2014-06-25 @6960 dst_release(dst); ^^^^^^^^^^^^^^^^ Unreachable code. --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Hi Dan, On 18/01/2022 14:28, Dan Carpenter wrote: > Hi Dmytro, > > url: https://github.com/0day-ci/linux/commits/Dmytro-SHYTYI/net-mptcp-Fast-Open-Mechanism/20220116-081430 > base: df0cc57e057f18e44dac8e6c18aba47ab53202f9 > config: x86_64-randconfig-m001 (https://download.01.org/0day-ci/archive/20220117/202201170247.BMTU5XYy-lkp@intel.com/config) > compiler: gcc-9 (Debian 9.3.0-22) 9.3.0 > > If you fix the issue, kindly add following tag as appropriate > Reported-by: kernel test robot <lkp@intel.com> > Reported-by: Dan Carpenter <dan.carpenter@oracle.com> > > smatch warnings: > net/ipv4/tcp_input.c:6960 tcp_conn_request() warn: ignoring unreachable code. > > vim +6960 net/ipv4/tcp_input.c > > 9caad864151e52 Eric Dumazet 2016-04-01 6951 if (want_cookie) { > 9caad864151e52 Eric Dumazet 2016-04-01 6952 reqsk_free(req); > 9caad864151e52 Eric Dumazet 2016-04-01 6953 return 0; > 9caad864151e52 Eric Dumazet 2016-04-01 6954 } > 1fb6f159fd21c6 Octavian Purdila 2014-06-25 6955 } > ca6fb06518836e Eric Dumazet 2015-10-02 6956 reqsk_put(req); > 1fb6f159fd21c6 Octavian Purdila 2014-06-25 6957 return 0; > ^^^^^^^^ > > 1fb6f159fd21c6 Octavian Purdila 2014-06-25 6958 > 52c7bf82e2e91e Dmytro SHYTYI 2022-01-16 6959 //drop_and_release: > 1fb6f159fd21c6 Octavian Purdila 2014-06-25 @6960 dst_release(dst); > ^^^^^^^^^^^^^^^^ > Unreachable code. Indeed. It is going to be fixed in next version of PATCH. Thanks. > --- > 0-DAY CI Kernel Test Service, Intel Corporation > https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org >
© 2016 - 2024 Red Hat, Inc.