Leverage the hybrid helper to implement the OoO queue prune at
ingress time.
If the msk is owned by the user-space at incoming skb time, perform the
pruning in the release_cb. The prune check is additionally performed
when the skb reaches the msk-level queues.
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
Notes:
- Similarly to path 'mptcp: move checks vs rcvbuf size earlier in the RX
path', some cleanup/tuning in mptcp_over_limit() will be needed
- Pruning in the release_cb() is likely not needed, should probably be
removed (after more testing).
---
net/mptcp/mib.c | 3 +++
net/mptcp/mib.h | 3 +++
net/mptcp/options.c | 22 +++++++++++++---
net/mptcp/protocol.c | 61 ++++++++++++++++++++++++++++++++++++++++++++
net/mptcp/protocol.h | 2 ++
5 files changed, 87 insertions(+), 4 deletions(-)
diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index f23fda0c55a7..5128feec942c 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -85,6 +85,9 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("SimultConnectFallback", MPTCP_MIB_SIMULTCONNFALLBACK),
SNMP_MIB_ITEM("FallbackFailed", MPTCP_MIB_FALLBACKFAILED),
SNMP_MIB_ITEM("WinProbe", MPTCP_MIB_WINPROBE),
+ SNMP_MIB_ITEM("OfoPruned", MPTCP_MIB_OFO_PRUNED),
+ SNMP_MIB_ITEM("RcvPruned", MPTCP_MIB_RCVPRUNED),
+ SNMP_MIB_ITEM("RcvCollapsed", MPTCP_MIB_RCVCOLLAPSED),
};
/* mptcp_mib_alloc - allocate percpu mib counters
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index 812218b5ed2b..2f8f68e33ac5 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -88,6 +88,9 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_SIMULTCONNFALLBACK, /* Simultaneous connect */
MPTCP_MIB_FALLBACKFAILED, /* Can't fallback due to msk status */
MPTCP_MIB_WINPROBE, /* MPTCP-level zero window probe */
+ MPTCP_MIB_OFO_PRUNED, /* MPTCP-level OoO queue pruned */
+ MPTCP_MIB_RCVPRUNED, /* Dropped due to memory constrains */
+ MPTCP_MIB_RCVCOLLAPSED, /* Collapsed due to memory pressure */
__MPTCP_MIB_MAX
};
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index a6d290427611..a6a6da262413 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -1158,15 +1158,29 @@ static bool add_addr_hmac_valid(struct mptcp_sock *msk,
return hmac == mp_opt->ahmac;
}
-static bool mptcp_over_limit(const struct sock *sk, struct sk_buff *skb)
+static bool mptcp_over_limit(struct sock *sk, struct sk_buff *skb, u32 seq)
{
+ struct mptcp_sock *msk = mptcp_sk(sk);
+ bool ret = true;
int limit;
if (!skb->len)
return false;
+ /* Allow some slack for backlog processing */
limit = READ_ONCE(sk->sk_rcvbuf) << 1;
- return sk_rmem_alloc_get(sk) > limit;
+ if (sk_rmem_alloc_get(sk) < limit)
+ return false;
+
+ mptcp_data_lock(sk);
+ if (!sock_owned_by_user(sk)) {
+ __mptcp_check_prune(sk, seq);
+ ret = sk_rmem_alloc_get(sk) > READ_ONCE(sk->sk_rcvbuf);
+ } else {
+ __set_bit(MPTCP_PRUNE, &msk->cb_flags);
+ }
+ mptcp_data_unlock(sk);
+ return ret;
}
/* Return false when the caller must to drop the packet, i.e. in case of error,
@@ -1197,7 +1211,7 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
__mptcp_data_acked(subflow->conn);
mptcp_data_unlock(subflow->conn);
- if (mptcp_over_limit(subflow->conn, skb))
+ if (mptcp_over_limit(subflow->conn, skb, msk->ack_seq))
return false;
return true;
}
@@ -1277,7 +1291,7 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
return true;
}
- if (mptcp_over_limit(subflow->conn, skb))
+ if (mptcp_over_limit(subflow->conn, skb, mp_opt.use_map ? mp_opt.data_seq : msk->ack_seq))
return false;
mpext = skb_ext_add(skb, SKB_EXT_MPTCP);
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 800aa7d9408e..9cf135e04d69 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -374,6 +374,59 @@ static void mptcp_init_skb(struct sock *ssk, struct sk_buff *skb, int offset,
skb_dst_drop(skb);
}
+/* "Inspiered" from the TCP version */
+static void mptcp_prune_ofo_queue(struct sock *sk, u32 seq)
+{
+ struct mptcp_sock *msk = mptcp_sk(sk);
+ struct rb_node *node, *prev;
+ bool pruned = false;
+
+ if (RB_EMPTY_ROOT(&msk->out_of_order_queue))
+ return;
+
+ node = &msk->ooo_last_skb->rbnode;
+
+ do {
+ struct sk_buff *skb = rb_to_skb(node);
+
+ /* If incoming skb would land last in ofo queue, stop pruning. */
+ if (after(seq, MPTCP_SKB_CB(skb)->map_seq))
+ break;
+
+ pruned = true;
+ prev = rb_prev(node);
+ rb_erase(node, &msk->out_of_order_queue);
+ mptcp_drop(sk, skb);
+ msk->ooo_last_skb = rb_to_skb(prev);
+ if (atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf)
+ break;
+
+ node = prev;
+ } while (node);
+
+ if (pruned)
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED);
+}
+
+bool __mptcp_check_prune(struct sock *sk, u32 seq)
+{
+ struct mptcp_sock *msk = mptcp_sk(sk);
+ unsigned int dropped;
+
+ if (likely(atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf))
+ return false;
+
+ dropped = xtcp_collapse_ofo_queue(sk, &msk->out_of_order_queue,
+ &msk->ooo_last_skb, msk->scaling_ratio);
+ if (dropped)
+ MPTCP_ADD_STATS(sock_net(sk), MPTCP_MIB_RCVCOLLAPSED, dropped);
+ if (likely(atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf))
+ return false;
+
+ mptcp_prune_ofo_queue(sk, seq);
+ return atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf;
+}
+
static bool __mptcp_move_skb(struct sock *sk, struct sk_buff *skb)
{
u32 copy_len = MPTCP_SKB_CB(skb)->end_seq - MPTCP_SKB_CB(skb)->map_seq;
@@ -383,6 +436,12 @@ static bool __mptcp_move_skb(struct sock *sk, struct sk_buff *skb)
mptcp_borrow_fwdmem(sk, skb);
+ if (__mptcp_check_prune(sk, MPTCP_SKB_CB(skb)->map_seq)) {
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RCVPRUNED);
+ mptcp_drop(sk, skb);
+ return false;
+ }
+
if (MPTCP_SKB_CB(skb)->map_seq == ack_seq) {
/* in sequence */
msk->bytes_received += copy_len;
@@ -3693,6 +3752,8 @@ static void mptcp_release_cb(struct sock *sk)
__mptcp_error_report(sk);
if (__test_and_clear_bit(MPTCP_SYNC_SNDBUF, &msk->cb_flags))
__mptcp_sync_sndbuf(sk);
+ if (__test_and_clear_bit(MPTCP_PRUNE, &msk->cb_flags))
+ __mptcp_check_prune(sk, msk->ack_seq - 1);
}
}
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index ad906737ee9f..e4bc77de725e 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -124,6 +124,7 @@
#define MPTCP_FLUSH_JOIN_LIST 5
#define MPTCP_SYNC_STATE 6
#define MPTCP_SYNC_SNDBUF 7
+#define MPTCP_PRUNE 8
struct mptcp_skb_cb {
u32 map_seq;
@@ -828,6 +829,7 @@ bool __mptcp_close(struct sock *sk, long timeout);
void mptcp_cancel_work(struct sock *sk);
void __mptcp_unaccepted_force_close(struct sock *sk);
void mptcp_set_state(struct sock *sk, int state);
+bool __mptcp_check_prune(struct sock *sk, u32 seq);
bool mptcp_addresses_equal(const struct mptcp_addr_info *a,
const struct mptcp_addr_info *b, bool use_port);
--
2.53.0
© 2016 - 2026 Red Hat, Inc.