RM_ADDR are sent over an active subflow, the first one in the subflows
list. There is then a high chance the initial subflow is picked. With
the in-kernel PM, when an endpoint is removed, a RM_ADDR is sent, then
linked subflows are closed. This is done for each active MPTCP
connection.
MPTCP endpoints are likely removed because the attached network is no
longer available or usable. In this case, it is better to avoid sending
this RM_ADDR over the subflow that is going to be removed, but prefer
sending it over another active and non stale subflow, if any.
This modification avoids situations where the other end is not notified
when a subflow is no longer usable: typically when the endpoint linked
to the initial subflow is removed, especially on the server side.
Fixes: 8dd5efb1f91b ("mptcp: send ack for rm_addr")
Reported-by: Frank Lorenz
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/612
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
---
net/mptcp/pm.c | 52 +++++++++++++++++++++++++++++++++++++++++-----------
1 file changed, 41 insertions(+), 11 deletions(-)
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 8206b0fd2377..ef76fbc40915 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -212,9 +212,24 @@ void mptcp_pm_send_ack(struct mptcp_sock *msk,
spin_lock_bh(&msk->pm.lock);
}
-void mptcp_pm_addr_send_ack(struct mptcp_sock *msk)
+static bool subflow_in_rlist(const struct mptcp_subflow_context *subflow,
+ const struct mptcp_rm_list *rlist)
{
- struct mptcp_subflow_context *subflow, *alt = NULL;
+ u8 i, id = subflow_get_local_id(subflow);
+
+ for (i = 0; i < rlist->nr; i++) {
+ if (rlist->ids[i] == id)
+ return true;
+ }
+
+ return false;
+}
+
+static void
+mptcp_pm_addr_send_ack_avoid_list(struct mptcp_sock *msk,
+ const struct mptcp_rm_list *rlist)
+{
+ struct mptcp_subflow_context *subflow, *stale = NULL, *same_id = NULL;
msk_owned_by_me(msk);
lockdep_assert_held(&msk->pm.lock);
@@ -225,18 +240,33 @@ void mptcp_pm_addr_send_ack(struct mptcp_sock *msk)
mptcp_for_each_subflow(msk, subflow) {
if (__mptcp_subflow_active(subflow)) {
- if (!subflow->stale) {
- mptcp_pm_send_ack(msk, subflow, false, false);
- return;
+ if (unlikely(subflow->stale)) {
+ if (!stale)
+ stale = subflow;
+ } else if (unlikely(rlist &&
+ subflow_in_rlist(subflow, rlist))) {
+ if (!same_id)
+ same_id = subflow;
+ } else {
+ goto send_ack;
}
-
- if (!alt)
- alt = subflow;
}
}
- if (alt)
- mptcp_pm_send_ack(msk, alt, false, false);
+ if (same_id)
+ subflow = same_id;
+ else if (stale)
+ subflow = stale;
+ else
+ return;
+
+send_ack:
+ mptcp_pm_send_ack(msk, subflow, false, false);
+}
+
+void mptcp_pm_addr_send_ack(struct mptcp_sock *msk)
+{
+ mptcp_pm_addr_send_ack_avoid_list(msk, NULL);
}
int mptcp_pm_mp_prio_send_ack(struct mptcp_sock *msk,
@@ -470,7 +500,7 @@ int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_
msk->pm.rm_list_tx = *rm_list;
rm_addr |= BIT(MPTCP_RM_ADDR_SIGNAL);
WRITE_ONCE(msk->pm.addr_signal, rm_addr);
- mptcp_pm_addr_send_ack(msk);
+ mptcp_pm_addr_send_ack_avoid_list(msk, rm_list);
return 0;
}
--
2.51.0
On 19/02/2026 20:01, Matthieu Baerts (NGI0) wrote:
> RM_ADDR are sent over an active subflow, the first one in the subflows
> list. There is then a high chance the initial subflow is picked. With
> the in-kernel PM, when an endpoint is removed, a RM_ADDR is sent, then
> linked subflows are closed. This is done for each active MPTCP
> connection.
>
> MPTCP endpoints are likely removed because the attached network is no
> longer available or usable. In this case, it is better to avoid sending
> this RM_ADDR over the subflow that is going to be removed, but prefer
> sending it over another active and non stale subflow, if any.
>
> This modification avoids situations where the other end is not notified
> when a subflow is no longer usable: typically when the endpoint linked
> to the initial subflow is removed, especially on the server side.
>
> Fixes: 8dd5efb1f91b ("mptcp: send ack for rm_addr")
> Reported-by: Frank Lorenz
> Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/612
> Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
> ---
> net/mptcp/pm.c | 52 +++++++++++++++++++++++++++++++++++++++++-----------
> 1 file changed, 41 insertions(+), 11 deletions(-)
>
> diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
> index 8206b0fd2377..ef76fbc40915 100644
> --- a/net/mptcp/pm.c
> +++ b/net/mptcp/pm.c
> @@ -212,9 +212,24 @@ void mptcp_pm_send_ack(struct mptcp_sock *msk,
> spin_lock_bh(&msk->pm.lock);
> }
>
> -void mptcp_pm_addr_send_ack(struct mptcp_sock *msk)
> +static bool subflow_in_rlist(const struct mptcp_subflow_context *subflow,
> + const struct mptcp_rm_list *rlist)
> {
> - struct mptcp_subflow_context *subflow, *alt = NULL;
> + u8 i, id = subflow_get_local_id(subflow);
> +
> + for (i = 0; i < rlist->nr; i++) {
> + if (rlist->ids[i] == id)
> + return true;
> + }
> +
> + return false;
> +}
> +
> +static void
> +mptcp_pm_addr_send_ack_avoid_list(struct mptcp_sock *msk,
> + const struct mptcp_rm_list *rlist)
> +{
> + struct mptcp_subflow_context *subflow, *stale = NULL, *same_id = NULL;
>
> msk_owned_by_me(msk);
> lockdep_assert_held(&msk->pm.lock);
> @@ -225,18 +240,33 @@ void mptcp_pm_addr_send_ack(struct mptcp_sock *msk)
>
> mptcp_for_each_subflow(msk, subflow) {
> if (__mptcp_subflow_active(subflow)) {
I should probably remove one level by doing:
if (!__mptcp_subflow_active(subflow))
continue;
I can send a v2 later, but feel free to review, it doesn't change the
fix here.
> - if (!subflow->stale) {
> - mptcp_pm_send_ack(msk, subflow, false, false);
> - return;
> + if (unlikely(subflow->stale)) {
> + if (!stale)
> + stale = subflow;
> + } else if (unlikely(rlist &&
> + subflow_in_rlist(subflow, rlist))) {
> + if (!same_id)
> + same_id = subflow;
> + } else {
> + goto send_ack;
> }
> -
> - if (!alt)
> - alt = subflow;
> }
> }
>
> - if (alt)
> - mptcp_pm_send_ack(msk, alt, false, false);
> + if (same_id)
> + subflow = same_id;
> + else if (stale)
> + subflow = stale;
Note: in my initial version, I only used one alternative for both
"stale" and "same id" subflows. I guess it is better to send over the
same subflow than a stale one, hence the priority, but there are then a
few more lines of code. To be discussed.
> + else
> + return;
> +
> +send_ack:
> + mptcp_pm_send_ack(msk, subflow, false, false);
> +}
> +
> +void mptcp_pm_addr_send_ack(struct mptcp_sock *msk)
> +{
> + mptcp_pm_addr_send_ack_avoid_list(msk, NULL);
> }
>
> int mptcp_pm_mp_prio_send_ack(struct mptcp_sock *msk,
> @@ -470,7 +500,7 @@ int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_
> msk->pm.rm_list_tx = *rm_list;
> rm_addr |= BIT(MPTCP_RM_ADDR_SIGNAL);
> WRITE_ONCE(msk->pm.addr_signal, rm_addr);
> - mptcp_pm_addr_send_ack(msk);
> + mptcp_pm_addr_send_ack_avoid_list(msk, rm_list);
> return 0;
> }
>
>
Cheers,
Matt
--
Sponsored by the NGI0 Core fund.
© 2016 - 2026 Red Hat, Inc.