net/mptcp/options.c | 16 +++++++++++++--- net/mptcp/subflow.c | 2 +- 2 files changed, 14 insertions(+), 4 deletions(-)
Please update the commit log too:
'''
This patch added the infinite mapping receiving logic.
In mptcp_incoming_options(), invoke the new helper function
mptcp_infinite_map_received() to check whether the infinite mapping
is received. If it is, set the infinite_map flag of struct mptcp_ext.
In get_mapping_status(), if the infinite mapping is received, set the
map_data_len of the subflow to 0.
In subflow_check_data_avail(), only reset the subflow when the map_data_len
of the subflow is non-zero.
'''
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
net/mptcp/options.c | 16 +++++++++++++---
net/mptcp/subflow.c | 2 +-
2 files changed, 14 insertions(+), 4 deletions(-)
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index f4591421ed22..0144cc97a123 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -1077,6 +1077,14 @@ static bool add_addr_hmac_valid(struct mptcp_sock *msk,
return hmac == mp_opt->ahmac;
}
+static bool mptcp_infinite_map_received(struct mptcp_options_received *mp_opt)
+{
+ if (mp_opt->use_map && !mp_opt->data_len)
+ return true;
+
+ return false;
+}
+
/* Return false if a subflow has been reset, else return true */
bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
{
@@ -1085,7 +1093,9 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
struct mptcp_options_received mp_opt;
struct mptcp_ext *mpext;
- if (__mptcp_check_fallback(msk)) {
+ mptcp_get_options(sk, skb, &mp_opt);
+
+ if (__mptcp_check_fallback(msk) && !mptcp_infinite_map_received(&mp_opt)) {
/* Keep it simple and unconditionally trigger send data cleanup and
* pending queue spooling. We will need to acquire the data lock
* for more accurate checks, and once the lock is acquired, such
@@ -1099,8 +1109,6 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
return true;
}
- mptcp_get_options(sk, skb, &mp_opt);
-
/* The subflow can be in close state only if check_fully_established()
* just sent a reset. If so, tell the caller to ignore the current packet.
*/
@@ -1202,6 +1210,8 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
if (mpext->csum_reqd)
mpext->csum = mp_opt.csum;
+ if (mptcp_infinite_map_received(&mp_opt))
+ mpext->infinite_map = 1;
}
return true;
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 9e54122f18f4..bb7d8685ffd4 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -938,7 +938,7 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
if (!skb)
return MAPPING_EMPTY;
- if (mptcp_check_fallback(ssk))
+ if (mptcp_check_fallback(ssk) && !mptcp_check_infinite_map(skb))
return MAPPING_DUMMY;
mpext = mptcp_get_ext(skb);
--
2.26.2
Hi Geliang, Thank you for the patch! Yet something to improve: [auto build test ERROR on linus/master] [also build test ERROR on v5.15-rc6 next-20211021] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Geliang-Tang/Squash-to-mptcp-infinite-mapping-receiving/20211021-140855 base: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 2f111a6fd5b5297b4e92f53798ca086f7c7d33a4 config: hexagon-randconfig-r045-20211021 (attached as .config) compiler: clang version 14.0.0 (https://github.com/llvm/llvm-project 3cea2505fd8d99a9ba0cb625aecfe28a47c4e3f8) reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/0day-ci/linux/commit/ca85326898507008f999c544140bfcecb30316b3 git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Geliang-Tang/Squash-to-mptcp-infinite-mapping-receiving/20211021-140855 git checkout ca85326898507008f999c544140bfcecb30316b3 # save the attached .config to linux build tree COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 ARCH=hexagon If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> All errors (new ones prefixed by >>): >> net/mptcp/subflow.c:941:36: error: implicit declaration of function 'mptcp_check_infinite_map' [-Werror,-Wimplicit-function-declaration] if (mptcp_check_fallback(ssk) && !mptcp_check_infinite_map(skb)) ^ 1 error generated. -- net/mptcp/options.c:566:21: warning: parameter 'remaining' set but not used [-Wunused-but-set-parameter] unsigned int remaining, ^ >> net/mptcp/options.c:1218:11: error: no member named 'infinite_map' in 'struct mptcp_ext' mpext->infinite_map = 1; ~~~~~ ^ 1 warning and 1 error generated. vim +/mptcp_check_infinite_map +941 net/mptcp/subflow.c 926 927 static enum mapping_status get_mapping_status(struct sock *ssk, 928 struct mptcp_sock *msk) 929 { 930 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); 931 bool csum_reqd = READ_ONCE(msk->csum_enabled); 932 struct mptcp_ext *mpext; 933 struct sk_buff *skb; 934 u16 data_len; 935 u64 map_seq; 936 937 skb = skb_peek(&ssk->sk_receive_queue); 938 if (!skb) 939 return MAPPING_EMPTY; 940 > 941 if (mptcp_check_fallback(ssk) && !mptcp_check_infinite_map(skb)) 942 return MAPPING_DUMMY; 943 944 mpext = mptcp_get_ext(skb); 945 if (!mpext || !mpext->use_map) { 946 if (!subflow->map_valid && !skb->len) { 947 /* the TCP stack deliver 0 len FIN pkt to the receive 948 * queue, that is the only 0len pkts ever expected here, 949 * and we can admit no mapping only for 0 len pkts 950 */ 951 if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) 952 WARN_ONCE(1, "0len seq %d:%d flags %x", 953 TCP_SKB_CB(skb)->seq, 954 TCP_SKB_CB(skb)->end_seq, 955 TCP_SKB_CB(skb)->tcp_flags); 956 sk_eat_skb(ssk, skb); 957 return MAPPING_EMPTY; 958 } 959 960 if (!subflow->map_valid) 961 return MAPPING_INVALID; 962 963 goto validate_seq; 964 } 965 966 trace_get_mapping_status(mpext); 967 968 data_len = mpext->data_len; 969 if (data_len == 0) { 970 MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX); 971 return MAPPING_INVALID; 972 } 973 974 if (mpext->data_fin == 1) { 975 if (data_len == 1) { 976 bool updated = mptcp_update_rcv_data_fin(msk, mpext->data_seq, 977 mpext->dsn64); 978 pr_debug("DATA_FIN with no payload seq=%llu", mpext->data_seq); 979 if (subflow->map_valid) { 980 /* A DATA_FIN might arrive in a DSS 981 * option before the previous mapping 982 * has been fully consumed. Continue 983 * handling the existing mapping. 984 */ 985 skb_ext_del(skb, SKB_EXT_MPTCP); 986 return MAPPING_OK; 987 } else { 988 if (updated && schedule_work(&msk->work)) 989 sock_hold((struct sock *)msk); 990 991 return MAPPING_DATA_FIN; 992 } 993 } else { 994 u64 data_fin_seq = mpext->data_seq + data_len - 1; 995 996 /* If mpext->data_seq is a 32-bit value, data_fin_seq 997 * must also be limited to 32 bits. 998 */ 999 if (!mpext->dsn64) 1000 data_fin_seq &= GENMASK_ULL(31, 0); 1001 1002 mptcp_update_rcv_data_fin(msk, data_fin_seq, mpext->dsn64); 1003 pr_debug("DATA_FIN with mapping seq=%llu dsn64=%d", 1004 data_fin_seq, mpext->dsn64); 1005 } 1006 1007 /* Adjust for DATA_FIN using 1 byte of sequence space */ 1008 data_len--; 1009 } 1010 1011 map_seq = mptcp_expand_seq(READ_ONCE(msk->ack_seq), mpext->data_seq, mpext->dsn64); 1012 WRITE_ONCE(mptcp_sk(subflow->conn)->use_64bit_ack, !!mpext->dsn64); 1013 1014 if (subflow->map_valid) { 1015 /* Allow replacing only with an identical map */ 1016 if (subflow->map_seq == map_seq && 1017 subflow->map_subflow_seq == mpext->subflow_seq && 1018 subflow->map_data_len == data_len && 1019 subflow->map_csum_reqd == mpext->csum_reqd) { 1020 skb_ext_del(skb, SKB_EXT_MPTCP); 1021 goto validate_csum; 1022 } 1023 1024 /* If this skb data are fully covered by the current mapping, 1025 * the new map would need caching, which is not supported 1026 */ 1027 if (skb_is_fully_mapped(ssk, skb)) { 1028 MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSNOMATCH); 1029 return MAPPING_INVALID; 1030 } 1031 1032 /* will validate the next map after consuming the current one */ 1033 goto validate_csum; 1034 } 1035 1036 subflow->map_seq = map_seq; 1037 subflow->map_subflow_seq = mpext->subflow_seq; 1038 subflow->map_data_len = data_len; 1039 subflow->map_valid = 1; 1040 subflow->map_data_fin = mpext->data_fin; 1041 subflow->mpc_map = mpext->mpc_map; 1042 subflow->map_csum_reqd = mpext->csum_reqd; 1043 subflow->map_csum_len = 0; 1044 subflow->map_data_csum = csum_unfold(mpext->csum); 1045 1046 /* Cfr RFC 8684 Section 3.3.0 */ 1047 if (unlikely(subflow->map_csum_reqd != csum_reqd)) 1048 return MAPPING_INVALID; 1049 1050 pr_debug("new map seq=%llu subflow_seq=%u data_len=%u csum=%d:%u", 1051 subflow->map_seq, subflow->map_subflow_seq, 1052 subflow->map_data_len, subflow->map_csum_reqd, 1053 subflow->map_data_csum); 1054 1055 validate_seq: 1056 /* we revalidate valid mapping on new skb, because we must ensure 1057 * the current skb is completely covered by the available mapping 1058 */ 1059 if (!validate_mapping(ssk, skb)) { 1060 MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSTCPMISMATCH); 1061 return MAPPING_INVALID; 1062 } 1063 1064 skb_ext_del(skb, SKB_EXT_MPTCP); 1065 1066 validate_csum: 1067 return validate_data_csum(ssk, skb, csum_reqd); 1068 } 1069 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
On Thu, 21 Oct 2021, Geliang Tang wrote: > Please update the commit log too: > > ''' > This patch added the infinite mapping receiving logic. > > In mptcp_incoming_options(), invoke the new helper function > mptcp_infinite_map_received() to check whether the infinite mapping > is received. If it is, set the infinite_map flag of struct mptcp_ext. > > In get_mapping_status(), if the infinite mapping is received, set the > map_data_len of the subflow to 0. > > In subflow_check_data_avail(), only reset the subflow when the map_data_len > of the subflow is non-zero. > ''' > > Signed-off-by: Geliang Tang <geliang.tang@suse.com> > --- > net/mptcp/options.c | 16 +++++++++++++--- > net/mptcp/subflow.c | 2 +- > 2 files changed, 14 insertions(+), 4 deletions(-) > > diff --git a/net/mptcp/options.c b/net/mptcp/options.c > index f4591421ed22..0144cc97a123 100644 > --- a/net/mptcp/options.c > +++ b/net/mptcp/options.c > @@ -1077,6 +1077,14 @@ static bool add_addr_hmac_valid(struct mptcp_sock *msk, > return hmac == mp_opt->ahmac; > } > > +static bool mptcp_infinite_map_received(struct mptcp_options_received *mp_opt) > +{ > + if (mp_opt->use_map && !mp_opt->data_len) > + return true; > + > + return false; > +} > + > /* Return false if a subflow has been reset, else return true */ > bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) > { > @@ -1085,7 +1093,9 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) > struct mptcp_options_received mp_opt; > struct mptcp_ext *mpext; > > - if (__mptcp_check_fallback(msk)) { > + mptcp_get_options(sk, skb, &mp_opt); Now this will scan through all TCP options on every packet after fallback, to catch a rare infinite mapping in a case where we are only looking for one after sending MP_FAIL. While it's not super expensive, it's still extra work that adds up over the life of a connection. More importantly, I think this is showing how infinite mapping fallback is probably more complicated than I thought. For "infinite mapping fallback", there seem to be two separate parts of fallback to keep track of: * Transmit fallback: This peer has sent an infinite mapping already (after receiving an MP_FAIL) and should not send any new MPTCP options. * Receive fallback: This peer has received an infinite mapping, and will ignore MPTCP options on all incoming packets. The RFC does say that a receiver of MP_FAIL must send an MP_FAIL so both directions fall back, but with packets in flight there's still some time where fallback has only happened in one direction: 1. Peer A sends MP_FAIL 2. Peer B receives MP_FAIL 3. Peer B sends MP_FAIL (on an ack or data packet) and infinite mapping (on next data packet) * Peer B enters "transmit fallback" at this time. 4. Peer A receives MP_FAIL, sends infinite mapping on next data packet. * Peer B enters "transmit fallback" when sending infinite mapping 5. Peer A receives infinite mapping * Peer A enters "receive fallback", fallback complete on this peer 6. Peer B receives infinite mapping * Peer B enters "receive fallback", fallback complete on this peer Steps 4 and 5 could be reversed, we can't assume which order Peer B will send the mapping and the MP_FAIL. Another layer of complexity is that MP_FAIL can be lost if it's not in a data packet, so we probably need to keep sending MP_FAIL until an infinite mapping is received (like multipath-tcp.org does). This also implies that the receive code should ignore repeated MP_FAILs. I also noticed that the multipath-tcp.org kernel does something interesting with connections that are entering fallback with MP_FAIL: packets that contain a regular mapping (not infinite) are dropped after MP_FAIL is sent but an infinite mapping has not been received yet. Look at mptcp_detect_mapping() and "Ignore packets which do not announce fallback" comment text in mptcp_input.c. I think the reasoning may be that the bad checksum that triggered the MP_FAIL has dropped some data, so to get the data stream back in a good state we need to wait for the infinite mapping as a signal that the other peer received the MP_FAIL and has resent data starting from the last DATA_ACKed byte. @Christoph, does that sound correct? ^^^^ 'git blame' said I should ask you :) > + > + if (__mptcp_check_fallback(msk) && !mptcp_infinite_map_received(&mp_opt)) { > /* Keep it simple and unconditionally trigger send data cleanup and > * pending queue spooling. We will need to acquire the data lock > * for more accurate checks, and once the lock is acquired, such > @@ -1099,8 +1109,6 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) > return true; > } > > - mptcp_get_options(sk, skb, &mp_opt); > - > /* The subflow can be in close state only if check_fully_established() > * just sent a reset. If so, tell the caller to ignore the current packet. > */ > @@ -1202,6 +1210,8 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) > > if (mpext->csum_reqd) > mpext->csum = mp_opt.csum; > + if (mptcp_infinite_map_received(&mp_opt)) > + mpext->infinite_map = 1; > } > > return true; > diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c > index 9e54122f18f4..bb7d8685ffd4 100644 > --- a/net/mptcp/subflow.c > +++ b/net/mptcp/subflow.c > @@ -938,7 +938,7 @@ static enum mapping_status get_mapping_status(struct sock *ssk, > if (!skb) > return MAPPING_EMPTY; > > - if (mptcp_check_fallback(ssk)) > + if (mptcp_check_fallback(ssk) && !mptcp_check_infinite_map(skb)) > return MAPPING_DUMMY; > > mpext = mptcp_get_ext(skb); > -- > 2.26.2 -- Mat Martineau Intel
Hello, > On Oct 21, 2021, at 6:06 PM, Mat Martineau <mathew.j.martineau@linux.intel.com> wrote: > > On Thu, 21 Oct 2021, Geliang Tang wrote: > >> Please update the commit log too: >> >> ''' >> This patch added the infinite mapping receiving logic. >> >> In mptcp_incoming_options(), invoke the new helper function >> mptcp_infinite_map_received() to check whether the infinite mapping >> is received. If it is, set the infinite_map flag of struct mptcp_ext. >> >> In get_mapping_status(), if the infinite mapping is received, set the >> map_data_len of the subflow to 0. >> >> In subflow_check_data_avail(), only reset the subflow when the map_data_len >> of the subflow is non-zero. >> ''' >> >> Signed-off-by: Geliang Tang <geliang.tang@suse.com> >> --- >> net/mptcp/options.c | 16 +++++++++++++--- >> net/mptcp/subflow.c | 2 +- >> 2 files changed, 14 insertions(+), 4 deletions(-) >> >> diff --git a/net/mptcp/options.c b/net/mptcp/options.c >> index f4591421ed22..0144cc97a123 100644 >> --- a/net/mptcp/options.c >> +++ b/net/mptcp/options.c >> @@ -1077,6 +1077,14 @@ static bool add_addr_hmac_valid(struct mptcp_sock *msk, >> return hmac == mp_opt->ahmac; >> } >> >> +static bool mptcp_infinite_map_received(struct mptcp_options_received *mp_opt) >> +{ >> + if (mp_opt->use_map && !mp_opt->data_len) >> + return true; >> + >> + return false; >> +} >> + >> /* Return false if a subflow has been reset, else return true */ >> bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) >> { >> @@ -1085,7 +1093,9 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) >> struct mptcp_options_received mp_opt; >> struct mptcp_ext *mpext; >> >> - if (__mptcp_check_fallback(msk)) { >> + mptcp_get_options(sk, skb, &mp_opt); > > Now this will scan through all TCP options on every packet after fallback, to catch a rare infinite mapping in a case where we are only looking for one after sending MP_FAIL. While it's not super expensive, it's still extra work that adds up over the life of a connection. More importantly, I think this is showing how infinite mapping fallback is probably more complicated than I thought. > > > For "infinite mapping fallback", there seem to be two separate parts of fallback to keep track of: > > * Transmit fallback: This peer has sent an infinite mapping already > (after receiving an MP_FAIL) and should not send any new MPTCP options. > > * Receive fallback: This peer has received an infinite mapping, and will > ignore MPTCP options on all incoming packets. > > The RFC does say that a receiver of MP_FAIL must send an MP_FAIL so both directions fall back, but with packets in flight there's still some time where fallback has only happened in one direction: > > 1. Peer A sends MP_FAIL > 2. Peer B receives MP_FAIL > 3. Peer B sends MP_FAIL (on an ack or data packet) and infinite mapping (on next data packet) > * Peer B enters "transmit fallback" at this time. > 4. Peer A receives MP_FAIL, sends infinite mapping on next data packet. > * Peer B enters "transmit fallback" when sending infinite mapping > 5. Peer A receives infinite mapping > * Peer A enters "receive fallback", fallback complete on this peer > 6. Peer B receives infinite mapping > * Peer B enters "receive fallback", fallback complete on this peer > > Steps 4 and 5 could be reversed, we can't assume which order Peer B will send the mapping and the MP_FAIL. > > Another layer of complexity is that MP_FAIL can be lost if it's not in a data packet, so we probably need to keep sending MP_FAIL until an infinite mapping is received (like multipath-tcp.org does). This also implies that the receive code should ignore repeated MP_FAILs. > > > > I also noticed that the multipath-tcp.org kernel does something interesting with connections that are entering fallback with MP_FAIL: packets that contain a regular mapping (not infinite) are dropped after MP_FAIL is sent but an infinite mapping has not been received yet. Look at mptcp_detect_mapping() and "Ignore packets which do not announce fallback" comment text in mptcp_input.c. > > I think the reasoning may be that the bad checksum that triggered the MP_FAIL has dropped some data, so to get the data stream back in a good state we need to wait for the infinite mapping as a signal that the other peer received the MP_FAIL and has resent data starting from the last DATA_ACKed byte. > > @Christoph, does that sound correct? ^^^^ > 'git blame' said I should ask you :) :) Yes, this sounds correct to me! In general, fallback to infinite mid-stream is really a best-effort thing. Don't spend CPU-cycles on that in the fast-path :) It happens only if middleboxes start interfering with the data-stream in a very disruptive way. It's ok to try to work-around these things but not at any cost. Better to kill the connection and fail hard. The fallback scenarios that are more common are the ones during connection establishment. Also, right after the TCP-handshake is a scenario that does happen. Christoph > >> + >> + if (__mptcp_check_fallback(msk) && !mptcp_infinite_map_received(&mp_opt)) { >> /* Keep it simple and unconditionally trigger send data cleanup and >> * pending queue spooling. We will need to acquire the data lock >> * for more accurate checks, and once the lock is acquired, such >> @@ -1099,8 +1109,6 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) >> return true; >> } >> >> - mptcp_get_options(sk, skb, &mp_opt); >> - >> /* The subflow can be in close state only if check_fully_established() >> * just sent a reset. If so, tell the caller to ignore the current packet. >> */ >> @@ -1202,6 +1210,8 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) >> >> if (mpext->csum_reqd) >> mpext->csum = mp_opt.csum; >> + if (mptcp_infinite_map_received(&mp_opt)) >> + mpext->infinite_map = 1; >> } >> >> return true; >> diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c >> index 9e54122f18f4..bb7d8685ffd4 100644 >> --- a/net/mptcp/subflow.c >> +++ b/net/mptcp/subflow.c >> @@ -938,7 +938,7 @@ static enum mapping_status get_mapping_status(struct sock *ssk, >> if (!skb) >> return MAPPING_EMPTY; >> >> - if (mptcp_check_fallback(ssk)) >> + if (mptcp_check_fallback(ssk) && !mptcp_check_infinite_map(skb)) >> return MAPPING_DUMMY; >> >> mpext = mptcp_get_ext(skb); >> -- >> 2.26.2 > > -- > Mat Martineau > Intel
© 2016 - 2024 Red Hat, Inc.