From: Geliang Tang <geliangtang@xiaomi.com>
This patch added a "noncontiguous" flag in the msk to track whether the
data is contiguous on a subflow. When retransmission happens, we could
set this flag, and once all retransmissions are DATA_ACK'd that flag
could be cleared.
When a bad checksum is detected and a single contiguous subflow is in
use, don't send RST + MP_FAIL, send data_ack + MP_FAIL instead.
Signed-off-by: Geliang Tang <geliangtang@xiaomi.com>
---
net/mptcp/protocol.c | 7 +++++++
net/mptcp/protocol.h | 2 ++
net/mptcp/subflow.c | 12 ++++++------
3 files changed, 15 insertions(+), 6 deletions(-)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index bb8a2a231479..81ea03b9fff6 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1095,6 +1095,9 @@ static void __mptcp_clean_una(struct sock *sk)
dfrag_uncharge(sk, delta);
cleaned = true;
+
+ if (dfrag->resend_count == 0)
+ WRITE_ONCE(msk->noncontiguous, false);
}
/* all retransmitted data acked, recovery completed */
@@ -1171,6 +1174,7 @@ mptcp_carve_data_frag(const struct mptcp_sock *msk, struct page_frag *pfrag,
dfrag->overhead = offset - orig_offset + sizeof(struct mptcp_data_frag);
dfrag->offset = offset + sizeof(struct mptcp_data_frag);
dfrag->already_sent = 0;
+ dfrag->resend_count = 0;
dfrag->page = pfrag->page;
return dfrag;
@@ -2454,6 +2458,8 @@ static void __mptcp_retrans(struct sock *sk)
dfrag->already_sent = max(dfrag->already_sent, info.sent);
tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
info.size_goal);
+ dfrag->resend_count++;
+ WRITE_ONCE(msk->noncontiguous, true);
}
release_sock(ssk);
@@ -2872,6 +2878,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
WRITE_ONCE(msk->fully_established, false);
if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD)
WRITE_ONCE(msk->csum_enabled, true);
+ WRITE_ONCE(msk->noncontiguous, false);
msk->write_seq = subflow_req->idsn + 1;
msk->snd_nxt = msk->write_seq;
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index d3e6fd1615f1..011f84ae1593 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -213,6 +213,7 @@ struct mptcp_data_frag {
u16 offset;
u16 overhead;
u16 already_sent;
+ u16 resend_count;
struct page *page;
};
@@ -249,6 +250,7 @@ struct mptcp_sock {
bool rcv_fastclose;
bool use_64bit_ack; /* Set when we received a 64-bit DSN */
bool csum_enabled;
+ bool noncontiguous;
spinlock_t join_list_lock;
struct work_struct work;
struct sk_buff *ooo_last_skb;
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 1de7ce883c37..951aafb6021e 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1166,15 +1166,15 @@ static bool subflow_check_data_avail(struct sock *ssk)
fallback:
/* RFC 8684 section 3.7. */
if (subflow->send_mp_fail) {
- if (mptcp_has_another_subflow(ssk)) {
+ if (mptcp_has_another_subflow(ssk) || READ_ONCE(msk->noncontiguous)) {
+ ssk->sk_err = EBADMSG;
+ tcp_set_state(ssk, TCP_CLOSE);
+ subflow->reset_transient = 0;
+ subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
+ tcp_send_active_reset(ssk, GFP_ATOMIC);
while ((skb = skb_peek(&ssk->sk_receive_queue)))
sk_eat_skb(ssk, skb);
}
- ssk->sk_err = EBADMSG;
- tcp_set_state(ssk, TCP_CLOSE);
- subflow->reset_transient = 0;
- subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
- tcp_send_active_reset(ssk, GFP_ATOMIC);
WRITE_ONCE(subflow->data_avail, 0);
return true;
}
--
2.31.1
On Thu, 2021-09-09 at 19:51 +0800, Geliang Tang wrote: > From: Geliang Tang <geliangtang@xiaomi.com> > > This patch added a "noncontiguous" flag in the msk to track whether the > data is contiguous on a subflow. When retransmission happens, we could > set this flag, and once all retransmissions are DATA_ACK'd that flag > could be cleared. > > When a bad checksum is detected and a single contiguous subflow is in > use, don't send RST + MP_FAIL, send data_ack + MP_FAIL instead. > > Signed-off-by: Geliang Tang <geliangtang@xiaomi.com> > --- > net/mptcp/protocol.c | 7 +++++++ > net/mptcp/protocol.h | 2 ++ > net/mptcp/subflow.c | 12 ++++++------ > 3 files changed, 15 insertions(+), 6 deletions(-) > > diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c > index bb8a2a231479..81ea03b9fff6 100644 > --- a/net/mptcp/protocol.c > +++ b/net/mptcp/protocol.c > @@ -1095,6 +1095,9 @@ static void __mptcp_clean_una(struct sock *sk) > > dfrag_uncharge(sk, delta); > cleaned = true; > + > + if (dfrag->resend_count == 0) > + WRITE_ONCE(msk->noncontiguous, false); > } > > /* all retransmitted data acked, recovery completed */ > @@ -1171,6 +1174,7 @@ mptcp_carve_data_frag(const struct mptcp_sock *msk, struct page_frag *pfrag, > dfrag->overhead = offset - orig_offset + sizeof(struct mptcp_data_frag); > dfrag->offset = offset + sizeof(struct mptcp_data_frag); > dfrag->already_sent = 0; > + dfrag->resend_count = 0; > dfrag->page = pfrag->page; > > return dfrag; > @@ -2454,6 +2458,8 @@ static void __mptcp_retrans(struct sock *sk) > dfrag->already_sent = max(dfrag->already_sent, info.sent); > tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, > info.size_goal); > + dfrag->resend_count++; > + WRITE_ONCE(msk->noncontiguous, true); > } > > release_sock(ssk); > @@ -2872,6 +2878,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk, > WRITE_ONCE(msk->fully_established, false); > if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD) > WRITE_ONCE(msk->csum_enabled, true); > + WRITE_ONCE(msk->noncontiguous, false); > > msk->write_seq = subflow_req->idsn + 1; > msk->snd_nxt = msk->write_seq; > diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h > index d3e6fd1615f1..011f84ae1593 100644 > --- a/net/mptcp/protocol.h > +++ b/net/mptcp/protocol.h > @@ -213,6 +213,7 @@ struct mptcp_data_frag { > u16 offset; > u16 overhead; > u16 already_sent; > + u16 resend_count; > struct page *page; > }; Ouch, the above increases mptcp_data_frag size by 8 bytes, due to holes. Is this necessary? I think the packet scheduler never reinject with a single subflow. It used to do that, but it should not do anymore. Even if the scheduler re-inject with a single subflow, can we instead keep track of the last retrans sequence number - in __mptcp_retrans(). msk stream is 'continuous' if and only if 'before64(last_retrnas_seq, msk->snd_una)' /P
On Thu, 9 Sep 2021, Paolo Abeni wrote: > On Thu, 2021-09-09 at 19:51 +0800, Geliang Tang wrote: >> From: Geliang Tang <geliangtang@xiaomi.com> >> >> This patch added a "noncontiguous" flag in the msk to track whether the >> data is contiguous on a subflow. When retransmission happens, we could >> set this flag, and once all retransmissions are DATA_ACK'd that flag >> could be cleared. >> >> When a bad checksum is detected and a single contiguous subflow is in >> use, don't send RST + MP_FAIL, send data_ack + MP_FAIL instead. >> >> Signed-off-by: Geliang Tang <geliangtang@xiaomi.com> >> --- >> net/mptcp/protocol.c | 7 +++++++ >> net/mptcp/protocol.h | 2 ++ >> net/mptcp/subflow.c | 12 ++++++------ >> 3 files changed, 15 insertions(+), 6 deletions(-) >> >> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c >> index bb8a2a231479..81ea03b9fff6 100644 >> --- a/net/mptcp/protocol.c >> +++ b/net/mptcp/protocol.c >> @@ -1095,6 +1095,9 @@ static void __mptcp_clean_una(struct sock *sk) >> >> dfrag_uncharge(sk, delta); >> cleaned = true; >> + >> + if (dfrag->resend_count == 0) >> + WRITE_ONCE(msk->noncontiguous, false); >> } >> >> /* all retransmitted data acked, recovery completed */ >> @@ -1171,6 +1174,7 @@ mptcp_carve_data_frag(const struct mptcp_sock *msk, struct page_frag *pfrag, >> dfrag->overhead = offset - orig_offset + sizeof(struct mptcp_data_frag); >> dfrag->offset = offset + sizeof(struct mptcp_data_frag); >> dfrag->already_sent = 0; >> + dfrag->resend_count = 0; >> dfrag->page = pfrag->page; >> >> return dfrag; >> @@ -2454,6 +2458,8 @@ static void __mptcp_retrans(struct sock *sk) >> dfrag->already_sent = max(dfrag->already_sent, info.sent); >> tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, >> info.size_goal); >> + dfrag->resend_count++; >> + WRITE_ONCE(msk->noncontiguous, true); >> } >> >> release_sock(ssk); >> @@ -2872,6 +2878,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk, >> WRITE_ONCE(msk->fully_established, false); >> if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD) >> WRITE_ONCE(msk->csum_enabled, true); >> + WRITE_ONCE(msk->noncontiguous, false); >> >> msk->write_seq = subflow_req->idsn + 1; >> msk->snd_nxt = msk->write_seq; >> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h >> index d3e6fd1615f1..011f84ae1593 100644 >> --- a/net/mptcp/protocol.h >> +++ b/net/mptcp/protocol.h >> @@ -213,6 +213,7 @@ struct mptcp_data_frag { >> u16 offset; >> u16 overhead; >> u16 already_sent; >> + u16 resend_count; >> struct page *page; >> }; > > Ouch, the above increases mptcp_data_frag size by 8 bytes, due to > holes. > What about rearranging the struct to eliminate the holes? (Full disclosure: I asked Geliang to add this, but am open to other solutions) I was also thinking it could be useful to have this information around for retransmission metrics, but that doesn't seem too important. > Is this necessary? I think the packet scheduler never reinject with a > single subflow. It used to do that, but it should not do anymore. > There may be a single subflow now, but multiple subflows (with unacked reinjections) a very short time ago. > Even if the scheduler re-inject with a single subflow, can we instead > keep track of the last retrans sequence number - in __mptcp_retrans(). > > msk stream is 'continuous' if and only if 'before64(last_retrnas_seq, > msk->snd_una)' > If last_retrans_seq is checked only when msk->noncontiguous is true, I think that works out. Otherwise the last_retrans_seq is stale/invalid if retransmission never happened, or any retransmissions have been fully acked. -- Mat Martineau Intel
On Thu, 2021-09-09 at 17:00 -0700, Mat Martineau wrote: > > On Thu, 9 Sep 2021, Paolo Abeni wrote: > > msk stream is 'continuous' if and only if 'before64(last_retrnas_seq, > > msk->snd_una)' > > > > If last_retrans_seq is checked only when msk->noncontiguous is true, I > think that works out. Otherwise the last_retrans_seq is stale/invalid if > retransmission never happened, or any retransmissions have been fully > acked. I think can we just init 'last_retrans_seq' to intial_seq - 1 at msk creation time and we could always use the above check: if (before64(last_retrnas_seq, msk->snd_una)) WDYT? Cheers, Paolo
On Fri, 10 Sep 2021, Paolo Abeni wrote: > On Thu, 2021-09-09 at 17:00 -0700, Mat Martineau wrote: >>> On Thu, 9 Sep 2021, Paolo Abeni wrote: >>> msk stream is 'continuous' if and only if 'before64(last_retrnas_seq, >>> msk->snd_una)' >>> >> >> If last_retrans_seq is checked only when msk->noncontiguous is true, I >> think that works out. Otherwise the last_retrans_seq is stale/invalid if >> retransmission never happened, or any retransmissions have been fully >> acked. > > I think can we just init 'last_retrans_seq' to intial_seq - 1 at msk > creation time and we could always use the above check: > > if (before64(last_retrnas_seq, msk->snd_una)) > > WDYT? > Ah, ok. Initialize it like that, and then keep moving last_retrans_seq ahead when updating snd_una and the MPTCP stream *is* contiguous, so we don't run in to problems when snd_una wraps around relative to last_retrans_seq. And be sure to set last_retrans_seq to the sequence number for the end of the retransmitted data when entering recovery as well as in __mptcp_retrans(). -- Mat Martineau Intel
© 2016 - 2025 Red Hat, Inc.