MPTCP rewrites the TCP shadow receive window on subflows when shared
receive-window state changes.
Once tp->rcv_wnd carries paired snapshot semantics, those subflow shadow
updates have to refresh the snapshot too. Convert the MPTCP window-sync
write sites to use the helper and keep the aggregate receive-space
arithmetic using the explicit rwnd-availability helper.
Signed-off-by: Wesley Atwell <atwellwea@gmail.com>
---
net/mptcp/options.c | 12 ++++++++----
net/mptcp/protocol.h | 14 +++++++++++---
2 files changed, 19 insertions(+), 7 deletions(-)
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 43df4293f58b..6e6aa084cbfa 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -1073,9 +1073,12 @@ static void rwin_update(struct mptcp_sock *msk, struct sock *ssk,
return;
/* Some other subflow grew the mptcp-level rwin since rcv_wup,
- * resync.
+ * resync. Keep the TCP shadow window in its advertised u32 domain
+ * and refresh the advertise-time scaling snapshot while doing so.
*/
- tp->rcv_wnd += mptcp_rcv_wnd - subflow->rcv_wnd_sent;
+ tcp_set_rcv_wnd(tp, min_t(u64, (u64)tp->rcv_wnd +
+ (mptcp_rcv_wnd - subflow->rcv_wnd_sent),
+ U32_MAX));
subflow->rcv_wnd_sent = mptcp_rcv_wnd;
}
@@ -1334,11 +1337,12 @@ static void mptcp_set_rwin(struct tcp_sock *tp, struct tcphdr *th)
if (rcv_wnd_new != rcv_wnd_old) {
raise_win:
/* The msk-level rcv wnd is after the tcp level one,
- * sync the latter.
+ * sync the latter and refresh its advertise-time scaling
+ * snapshot.
*/
rcv_wnd_new = rcv_wnd_old;
win = rcv_wnd_old - ack_seq;
- tp->rcv_wnd = min_t(u64, win, U32_MAX);
+ tcp_set_rcv_wnd(tp, min_t(u64, win, U32_MAX));
new_win = tp->rcv_wnd;
/* Make sure we do not exceed the maximum possible
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 0bd1ee860316..4ea95c9c0c7a 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -408,11 +408,19 @@ static inline int mptcp_space_from_win(const struct sock *sk, int win)
return __tcp_space_from_win(mptcp_sk(sk)->scaling_ratio, win);
}
+/* MPTCP exposes window space from the mptcp-level receive queue, so it tracks
+ * a separate backlog counter from the subflow backlog embedded in struct sock.
+ */
+static inline int mptcp_rwnd_avail(const struct sock *sk)
+{
+ return READ_ONCE(sk->sk_rcvbuf) -
+ READ_ONCE(mptcp_sk(sk)->backlog_len) -
+ tcp_rmem_used(sk);
+}
+
static inline int __mptcp_space(const struct sock *sk)
{
- return mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) -
- READ_ONCE(mptcp_sk(sk)->backlog_len) -
- sk_rmem_alloc_get(sk));
+ return mptcp_win_from_space(sk, mptcp_rwnd_avail(sk));
}
static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk)
--
2.34.1