In some edge scenarios, an MPTCP subflows can use a local address
mapped by a "implicit" endpoint created by the in-kernel path manager.
When such endpoint is deleted, the in kernel PM sends a RM_ADDR MPTCP
suboption. That is somewhat unexpected, as an MPTCP listener will keep
accepting incoming subflows targeting such address and the unexpected
options can confuse some self-tests.
Be more conservative about RM_ADDR generation: explicitly track the
implicit endpoint with an appropriate flag and exclude them from the
RM_ADDR generation.
Additionally allow the user-space to replace implicit endpoint with
user-provided data at endpoint creation time.
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
include/uapi/linux/mptcp.h | 1 +
net/mptcp/pm_netlink.c | 42 ++++++++++++++++++++++++++++----------
2 files changed, 32 insertions(+), 11 deletions(-)
diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h
index f106a3941cdf..9690efedb5fa 100644
--- a/include/uapi/linux/mptcp.h
+++ b/include/uapi/linux/mptcp.h
@@ -81,6 +81,7 @@ enum {
#define MPTCP_PM_ADDR_FLAG_SUBFLOW (1 << 1)
#define MPTCP_PM_ADDR_FLAG_BACKUP (1 << 2)
#define MPTCP_PM_ADDR_FLAG_FULLMESH (1 << 3)
+#define MPTCP_PM_ADDR_FLAG_IMPLICIT (1 << 4)
enum {
MPTCP_PM_CMD_UNSPEC,
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 56f5603c10f2..66cda3a425c4 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -901,8 +901,19 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
list_for_each_entry(cur, &pernet->local_addr_list, list) {
if (addresses_equal(&cur->addr, &entry->addr,
address_use_port(entry) &&
- address_use_port(cur)))
- goto out;
+ address_use_port(cur))) {
+ /* allow replacing the exiting endpoint only if such
+ * endpoint is an implicit one and the user-space
+ * did not provide an endpoint id
+ */
+ if (!(cur->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT))
+ goto out;
+ if (entry->addr.id)
+ goto out;
+
+ pernet->addrs--;
+ list_del_rcu(&entry->list);
+ }
}
if (!entry->addr.id) {
@@ -1036,7 +1047,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
entry->addr.id = 0;
entry->addr.port = 0;
entry->ifindex = 0;
- entry->flags = 0;
+ entry->flags = MPTCP_PM_ADDR_FLAG_IMPLICIT;
entry->lsk = NULL;
ret = mptcp_pm_nl_append_new_local_addr(pernet, entry);
if (ret < 0)
@@ -1238,6 +1249,11 @@ static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
}
+ if (addr.flags & MPTCP_PM_ADDR_FLAG_IMPLICIT) {
+ GENL_SET_ERR_MSG(info, "can't create IMPLICIT endpoint");
+ return -EINVAL;
+ }
+
entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry) {
GENL_SET_ERR_MSG(info, "can't allocate addr");
@@ -1322,11 +1338,12 @@ static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk,
}
static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net,
- struct mptcp_addr_info *addr)
+ const struct mptcp_pm_addr_entry *entry)
{
- struct mptcp_sock *msk;
- long s_slot = 0, s_num = 0;
+ const struct mptcp_addr_info *addr = &entry->addr;
struct mptcp_rm_list list = { .nr = 0 };
+ long s_slot = 0, s_num = 0;
+ struct mptcp_sock *msk;
pr_debug("remove_id=%d", addr->id);
@@ -1346,7 +1363,8 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net,
lock_sock(sk);
remove_subflow = lookup_subflow_by_saddr(&msk->conn_list, addr);
- mptcp_pm_remove_anno_addr(msk, addr, remove_subflow);
+ mptcp_pm_remove_anno_addr(msk, addr, remove_subflow &&
+ !(entry->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT));
if (remove_subflow)
mptcp_pm_remove_subflow(msk, &list);
release_sock(sk);
@@ -1443,7 +1461,7 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info)
__clear_bit(entry->addr.id, pernet->id_bitmap);
spin_unlock_bh(&pernet->lock);
- mptcp_nl_remove_subflow_and_signal_addr(sock_net(skb->sk), &entry->addr);
+ mptcp_nl_remove_subflow_and_signal_addr(sock_net(skb->sk), entry);
synchronize_rcu();
__mptcp_pm_release_addr_entry(entry);
@@ -1458,9 +1476,11 @@ static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk,
list_for_each_entry(entry, rm_list, list) {
if (lookup_subflow_by_saddr(&msk->conn_list, &entry->addr) &&
- alist.nr < MPTCP_RM_IDS_MAX &&
slist.nr < MPTCP_RM_IDS_MAX) {
- alist.ids[alist.nr++] = entry->addr.id;
+ /* skip RM_ADDR for dummy endpoints */
+ if (!(entry->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT) &&
+ alist.nr < MPTCP_RM_IDS_MAX)
+ alist.ids[alist.nr++] = entry->addr.id;
slist.ids[slist.nr++] = entry->addr.id;
} else if (remove_anno_list_by_saddr(msk, &entry->addr) &&
alist.nr < MPTCP_RM_IDS_MAX) {
@@ -1811,7 +1831,7 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
spin_lock_bh(&pernet->lock);
entry = __lookup_addr(pernet, &addr.addr, lookup_by_id);
- if (!entry) {
+ if (!entry || (entry->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT)) {
spin_unlock_bh(&pernet->lock);
return -EINVAL;
}
--
2.34.1
On Mon, 2022-02-14 at 16:38 +0100, Paolo Abeni wrote: > In some edge scenarios, an MPTCP subflows can use a local address > mapped by a "implicit" endpoint created by the in-kernel path manager. > > When such endpoint is deleted, the in kernel PM sends a RM_ADDR MPTCP > suboption. That is somewhat unexpected, as an MPTCP listener will keep > accepting incoming subflows targeting such address and the unexpected > options can confuse some self-tests. > > Be more conservative about RM_ADDR generation: explicitly track the > implicit endpoint with an appropriate flag and exclude them from the > RM_ADDR generation. > > Additionally allow the user-space to replace implicit endpoint with > user-provided data at endpoint creation time. > > Signed-off-by: Paolo Abeni <pabeni@redhat.com> The CI still reports an issue on top of this patch: https://cirrus-ci.com/task/5380046008352768?logs=test#L7283 Even if the symtom is the same, the root cause is different. I think such failure is caused by the flush operation deleting both all existing endpoints and all the existing subflows for each MPTCP socket. Even when the subflow is not attached to any known, non implicit, endpoint. When we have simult flush on both sides, deleting the subflow (with no endpoint) on one end can make disappear the subflow on the other end (tied to a local endpoint) before the PM could generate the relevant RM_ADDR. Overall the number of generated RM_ADDR become impredictable, even if the testcase is quite stable, to the point I could not replicate the failure reported by the CI. AFAICS, there are 2 possible solutions: - avoid flushing both ends in test-cases "flush subflows" and "flush addresses". This is very simple, but could hide other (currently unknown) problems. - change mptcp_nl_cmd_flush_addrs() to only delete subflows tied to known, non implicit, endpoint. That is possibly a saner behavior for ip mptcp endpoint flush, but will need more testcases to be adjusted and is still a change of behaviour. Any hint on the preferred option more than welcome! /P
On Mon, 14 Feb 2022, Paolo Abeni wrote: > On Mon, 2022-02-14 at 16:38 +0100, Paolo Abeni wrote: >> In some edge scenarios, an MPTCP subflows can use a local address >> mapped by a "implicit" endpoint created by the in-kernel path manager. >> >> When such endpoint is deleted, the in kernel PM sends a RM_ADDR MPTCP >> suboption. That is somewhat unexpected, as an MPTCP listener will keep >> accepting incoming subflows targeting such address and the unexpected >> options can confuse some self-tests. >> >> Be more conservative about RM_ADDR generation: explicitly track the >> implicit endpoint with an appropriate flag and exclude them from the >> RM_ADDR generation. >> >> Additionally allow the user-space to replace implicit endpoint with >> user-provided data at endpoint creation time. >> >> Signed-off-by: Paolo Abeni <pabeni@redhat.com> > > The CI still reports an issue on top of this patch: > > https://cirrus-ci.com/task/5380046008352768?logs=test#L7283 > > Even if the symtom is the same, the root cause is different. I think > such failure is caused by the flush operation deleting both all > existing endpoints and all the existing subflows for each MPTCP socket. > > Even when the subflow is not attached to any known, non implicit, > endpoint. > > When we have simult flush on both sides, deleting the subflow (with no > endpoint) on one end can make disappear the subflow on the other end > (tied to a local endpoint) before the PM could generate the relevant > RM_ADDR. > If we aren't closing all subflows during the flush, shouldn't there be at least one subflow where the RM_ADDR can be sent? Maybe mptcp_pm_nl_addr_send_ack() needs to pick a better subflow for sending the RM_ADDR. Do you think it would help to make mptcp_subflow_send_ack() return a bool, and only return true if the ack was sent? Then it could retry the ack on other subflows in the conn_list, until it finds one that works or they all fail. > Overall the number of generated RM_ADDR become impredictable, even if > the testcase is quite stable, to the point I could not replicate the > failure reported by the CI. > > AFAICS, there are 2 possible solutions: > > - avoid flushing both ends in test-cases "flush subflows" and "flush > addresses". This is very simple, but could hide other (currently > unknown) problems. > - change mptcp_nl_cmd_flush_addrs() to only delete subflows tied to > known, non implicit, endpoint. That is possibly a saner behavior for ip > mptcp endpoint flush, but will need more testcases to be adjusted and > is still a change of behaviour. > > Any hint on the preferred option more than welcome! -- Mat Martineau Intel
On Mon, 2022-02-14 at 17:19 -0800, Mat Martineau wrote: > On Mon, 14 Feb 2022, Paolo Abeni wrote: > > > On Mon, 2022-02-14 at 16:38 +0100, Paolo Abeni wrote: > > > In some edge scenarios, an MPTCP subflows can use a local address > > > mapped by a "implicit" endpoint created by the in-kernel path manager. > > > > > > When such endpoint is deleted, the in kernel PM sends a RM_ADDR MPTCP > > > suboption. That is somewhat unexpected, as an MPTCP listener will keep > > > accepting incoming subflows targeting such address and the unexpected > > > options can confuse some self-tests. > > > > > > Be more conservative about RM_ADDR generation: explicitly track the > > > implicit endpoint with an appropriate flag and exclude them from the > > > RM_ADDR generation. > > > > > > Additionally allow the user-space to replace implicit endpoint with > > > user-provided data at endpoint creation time. > > > > > > Signed-off-by: Paolo Abeni <pabeni@redhat.com> > > > > The CI still reports an issue on top of this patch: > > > > https://cirrus-ci.com/task/5380046008352768?logs=test#L7283 > > > > Even if the symtom is the same, the root cause is different. I think > > such failure is caused by the flush operation deleting both all > > existing endpoints and all the existing subflows for each MPTCP socket. > > > > Even when the subflow is not attached to any known, non implicit, > > endpoint. > > > > When we have simult flush on both sides, deleting the subflow (with no > > endpoint) on one end can make disappear the subflow on the other end > > (tied to a local endpoint) before the PM could generate the relevant > > RM_ADDR. > > > > If we aren't closing all subflows during the flush, shouldn't there be at > least one subflow where the RM_ADDR can be sent? > > Maybe mptcp_pm_nl_addr_send_ack() needs to pick a better subflow for > sending the RM_ADDR. Do you think it would help to make > mptcp_subflow_send_ack() return a bool, and only return true if the ack > was sent? Then it could retry the ack on other subflows in the conn_list, > until it finds one that works or they all fail. I'm sorry it looks like I was unclear. What I mean is that in the following scenario: Client Server [endpoint1] ------- MPTCP-subflow ----> [addr0/no endpoint] [endpoint2] ------- subflow 1 -------> [addr0/no endpoint] [endpoint3] ------- subflow 2 -------> [addr0/no endpoint] if we flush simultaneusly the endpoints on both the server and the client (which is what the failing selftest is currently doing), the number of RM_ADDR generated by the client is unpredictable. On endpoint flush the server will try to delete all the subflows, regardless of no endpoints attached there. The server can delete the subflow 1 and/or the subflow 2 before the client processes the relevant endpoint on the other side. If that happens, the client will not generate (correctly) the related RM_ADDR. The testcase is currently expectiong exactly 2 RM_ADDR in the above scenario (well actually 3, because the testcase uses 3 MPJ subflows) Picking a different subflow to send the RM_ADDR will not change the results. Not sure if the above is somewhat more clear. I don't see other viable options other then the 2 mentioned in my previous email. Thanks, Paolo
On Tue, 15 Feb 2022, Paolo Abeni wrote:
> On Mon, 2022-02-14 at 17:19 -0800, Mat Martineau wrote:
>> On Mon, 14 Feb 2022, Paolo Abeni wrote:
>>
>>> On Mon, 2022-02-14 at 16:38 +0100, Paolo Abeni wrote:
>>>> In some edge scenarios, an MPTCP subflows can use a local address
>>>> mapped by a "implicit" endpoint created by the in-kernel path manager.
>>>>
>>>> When such endpoint is deleted, the in kernel PM sends a RM_ADDR MPTCP
>>>> suboption. That is somewhat unexpected, as an MPTCP listener will keep
>>>> accepting incoming subflows targeting such address and the unexpected
>>>> options can confuse some self-tests.
>>>>
>>>> Be more conservative about RM_ADDR generation: explicitly track the
>>>> implicit endpoint with an appropriate flag and exclude them from the
>>>> RM_ADDR generation.
>>>>
>>>> Additionally allow the user-space to replace implicit endpoint with
>>>> user-provided data at endpoint creation time.
>>>>
>>>> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
>>>
>>> The CI still reports an issue on top of this patch:
>>>
>>> https://cirrus-ci.com/task/5380046008352768?logs=test#L7283
>>>
>>> Even if the symtom is the same, the root cause is different. I think
>>> such failure is caused by the flush operation deleting both all
>>> existing endpoints and all the existing subflows for each MPTCP socket.
>>>
>>> Even when the subflow is not attached to any known, non implicit,
>>> endpoint.
>>>
>>> When we have simult flush on both sides, deleting the subflow (with no
>>> endpoint) on one end can make disappear the subflow on the other end
>>> (tied to a local endpoint) before the PM could generate the relevant
>>> RM_ADDR.
>>>
>>
>> If we aren't closing all subflows during the flush, shouldn't there be at
>> least one subflow where the RM_ADDR can be sent?
>>
>> Maybe mptcp_pm_nl_addr_send_ack() needs to pick a better subflow for
>> sending the RM_ADDR. Do you think it would help to make
>> mptcp_subflow_send_ack() return a bool, and only return true if the ack
>> was sent? Then it could retry the ack on other subflows in the conn_list,
>> until it finds one that works or they all fail.
>
> I'm sorry it looks like I was unclear.
>
> What I mean is that in the following scenario:
>
> Client Server
> [endpoint1] ------- MPTCP-subflow ----> [addr0/no endpoint]
> [endpoint2] ------- subflow 1 -------> [addr0/no endpoint]
> [endpoint3] ------- subflow 2 -------> [addr0/no endpoint]
>
> if we flush simultaneusly the endpoints on both the server and the
> client (which is what the failing selftest is currently doing), the
> number of RM_ADDR generated by the client is unpredictable.
>
> On endpoint flush the server will try to delete all the subflows,
> regardless of no endpoints attached there. The server can delete the
> subflow 1 and/or the subflow 2 before the client processes the relevant
> endpoint on the other side. If that happens, the client will not
> generate (correctly) the related RM_ADDR.
I'm not sure that's correct, though. Even if subflows 1 and/or 2 have been
closed, if the client has sent an ADD_ADDR to advertise endpoints 2 and 3,
it should send RM_ADDR to "unadvertise" them on the remaining open
subflow. While RM_ADDR does in some cases trigger disconnects, its primary
role is to inform the peer that previous advertisements have been revoked.
>
> The testcase is currently expectiong exactly 2 RM_ADDR in the above
> scenario (well actually 3, because the testcase uses 3 MPJ subflows)
>
> Picking a different subflow to send the RM_ADDR will not change the
> results.
>
> Not sure if the above is somewhat more clear.
>
I think your explanation is clear, but we are each trying to explain a
different model of how things should work :)
In our in-kernel PM implementation, ADD_ADDR ends up being treated as a
"request to connect" event and RM_ADDR is a "request to disconnect" event.
That's doesn't completely capture what the RFC intends: ADD_ADDR is an
advertisement that an endpoint is available for a peer PM to potentially
connect to (at that moment or any later time before RM_ADDR or connection
close). RM_ADDR revokes that advertisement. From the RFC (note middle
sentence, especially):
3.4.2. Remove Address
If, during the lifetime of an MPTCP connection, a previously
announced address becomes invalid (e.g., if the interface disappears
or an IPv6 address is no longer preferred), the affected host SHOULD
announce this situation so that the peer can remove subflows related
to this address. Even if an address is not in use by an MPTCP
connection, if it has been previously announced, an implementation
SHOULD announce its removal. A host MAY also choose to announce that
a valid IP address should not be used any longer -- for example, for
make-before-break session continuity.
Those SHOULDs do say that maybe our self test is more strict than the RFC,
but if we fix our in-kernel PM to send a predictable number of RM_ADDRs
then the existing tests can be satisified.
--
Mat Martineau
Intel
On Tue, 2022-02-15 at 11:15 -0800, Mat Martineau wrote: > On Tue, 15 Feb 2022, Paolo Abeni wrote: > > > On Mon, 2022-02-14 at 17:19 -0800, Mat Martineau wrote: > > > On Mon, 14 Feb 2022, Paolo Abeni wrote: > > > > > > > On Mon, 2022-02-14 at 16:38 +0100, Paolo Abeni wrote: > > > > > In some edge scenarios, an MPTCP subflows can use a local address > > > > > mapped by a "implicit" endpoint created by the in-kernel path manager. > > > > > > > > > > When such endpoint is deleted, the in kernel PM sends a RM_ADDR MPTCP > > > > > suboption. That is somewhat unexpected, as an MPTCP listener will keep > > > > > accepting incoming subflows targeting such address and the unexpected > > > > > options can confuse some self-tests. > > > > > > > > > > Be more conservative about RM_ADDR generation: explicitly track the > > > > > implicit endpoint with an appropriate flag and exclude them from the > > > > > RM_ADDR generation. > > > > > > > > > > Additionally allow the user-space to replace implicit endpoint with > > > > > user-provided data at endpoint creation time. > > > > > > > > > > Signed-off-by: Paolo Abeni <pabeni@redhat.com> > > > > > > > > The CI still reports an issue on top of this patch: > > > > > > > > https://cirrus-ci.com/task/5380046008352768?logs=test#L7283 > > > > > > > > Even if the symtom is the same, the root cause is different. I think > > > > such failure is caused by the flush operation deleting both all > > > > existing endpoints and all the existing subflows for each MPTCP socket. > > > > > > > > Even when the subflow is not attached to any known, non implicit, > > > > endpoint. > > > > > > > > When we have simult flush on both sides, deleting the subflow (with no > > > > endpoint) on one end can make disappear the subflow on the other end > > > > (tied to a local endpoint) before the PM could generate the relevant > > > > RM_ADDR. > > > > > > > > > > If we aren't closing all subflows during the flush, shouldn't there be at > > > least one subflow where the RM_ADDR can be sent? > > > > > > Maybe mptcp_pm_nl_addr_send_ack() needs to pick a better subflow for > > > sending the RM_ADDR. Do you think it would help to make > > > mptcp_subflow_send_ack() return a bool, and only return true if the ack > > > was sent? Then it could retry the ack on other subflows in the conn_list, > > > until it finds one that works or they all fail. > > > > I'm sorry it looks like I was unclear. > > > > What I mean is that in the following scenario: > > > > Client Server > > [endpoint1] ------- MPTCP-subflow ----> [addr0/no endpoint] > > [endpoint2] ------- subflow 1 -------> [addr0/no endpoint] > > [endpoint3] ------- subflow 2 -------> [addr0/no endpoint] > > > > if we flush simultaneusly the endpoints on both the server and the > > client (which is what the failing selftest is currently doing), the > > number of RM_ADDR generated by the client is unpredictable. > > > > On endpoint flush the server will try to delete all the subflows, > > regardless of no endpoints attached there. The server can delete the > > subflow 1 and/or the subflow 2 before the client processes the relevant > > endpoint on the other side. If that happens, the client will not > > generate (correctly) the related RM_ADDR. > > I'm not sure that's correct, though. Even if subflows 1 and/or 2 have been > closed, if the client has sent an ADD_ADDR to advertise endpoints 2 and 3, > it should send RM_ADDR to "unadvertise" them on the remaining open > subflow. If the peer sent ADD_ADDR for a given address, it will generate the RM_ADDR, regardless of the related subflow being already closed. We don't have bugs - al least not shown by the current self-tests failures - in that situation. The criticial scenario is a bit different: the client created the subflows, but it did not avertize any address with ADD_ADDR (the client endpoints have the 'SUBFLOW' flag, not the 'SIGNAL' one). If I read correctly the discussion about ADD_ADDR handling, a correct and simple solution would be sending RM_ADDR only for 'SIGNAL' endpoints, and update the test-cases accordingly. It's not clear to me if in case of 'endpoint flush', we should keep deleting all the subflows - including the ones not tied to 'SIGNAL' endpoints on any end. Cheers, P
On Wed, 16 Feb 2022, Paolo Abeni wrote: > On Tue, 2022-02-15 at 11:15 -0800, Mat Martineau wrote: >> On Tue, 15 Feb 2022, Paolo Abeni wrote: >> >>> On Mon, 2022-02-14 at 17:19 -0800, Mat Martineau wrote: >>>> On Mon, 14 Feb 2022, Paolo Abeni wrote: >>>> >>>>> On Mon, 2022-02-14 at 16:38 +0100, Paolo Abeni wrote: >>>>>> In some edge scenarios, an MPTCP subflows can use a local address >>>>>> mapped by a "implicit" endpoint created by the in-kernel path manager. >>>>>> >>>>>> When such endpoint is deleted, the in kernel PM sends a RM_ADDR MPTCP >>>>>> suboption. That is somewhat unexpected, as an MPTCP listener will keep >>>>>> accepting incoming subflows targeting such address and the unexpected >>>>>> options can confuse some self-tests. >>>>>> >>>>>> Be more conservative about RM_ADDR generation: explicitly track the >>>>>> implicit endpoint with an appropriate flag and exclude them from the >>>>>> RM_ADDR generation. >>>>>> >>>>>> Additionally allow the user-space to replace implicit endpoint with >>>>>> user-provided data at endpoint creation time. >>>>>> >>>>>> Signed-off-by: Paolo Abeni <pabeni@redhat.com> >>>>> >>>>> The CI still reports an issue on top of this patch: >>>>> >>>>> https://cirrus-ci.com/task/5380046008352768?logs=test#L7283 >>>>> >>>>> Even if the symtom is the same, the root cause is different. I think >>>>> such failure is caused by the flush operation deleting both all >>>>> existing endpoints and all the existing subflows for each MPTCP socket. >>>>> >>>>> Even when the subflow is not attached to any known, non implicit, >>>>> endpoint. >>>>> >>>>> When we have simult flush on both sides, deleting the subflow (with no >>>>> endpoint) on one end can make disappear the subflow on the other end >>>>> (tied to a local endpoint) before the PM could generate the relevant >>>>> RM_ADDR. >>>>> >>>> >>>> If we aren't closing all subflows during the flush, shouldn't there be at >>>> least one subflow where the RM_ADDR can be sent? >>>> >>>> Maybe mptcp_pm_nl_addr_send_ack() needs to pick a better subflow for >>>> sending the RM_ADDR. Do you think it would help to make >>>> mptcp_subflow_send_ack() return a bool, and only return true if the ack >>>> was sent? Then it could retry the ack on other subflows in the conn_list, >>>> until it finds one that works or they all fail. >>> >>> I'm sorry it looks like I was unclear. >>> >>> What I mean is that in the following scenario: >>> >>> Client Server >>> [endpoint1] ------- MPTCP-subflow ----> [addr0/no endpoint] >>> [endpoint2] ------- subflow 1 -------> [addr0/no endpoint] >>> [endpoint3] ------- subflow 2 -------> [addr0/no endpoint] >>> >>> if we flush simultaneusly the endpoints on both the server and the >>> client (which is what the failing selftest is currently doing), the >>> number of RM_ADDR generated by the client is unpredictable. >>> >>> On endpoint flush the server will try to delete all the subflows, >>> regardless of no endpoints attached there. The server can delete the >>> subflow 1 and/or the subflow 2 before the client processes the relevant >>> endpoint on the other side. If that happens, the client will not >>> generate (correctly) the related RM_ADDR. >> >> I'm not sure that's correct, though. Even if subflows 1 and/or 2 have been >> closed, if the client has sent an ADD_ADDR to advertise endpoints 2 and 3, >> it should send RM_ADDR to "unadvertise" them on the remaining open >> subflow. > > If the peer sent ADD_ADDR for a given address, it will generate the > RM_ADDR, regardless of the related subflow being already closed. We > don't have bugs - al least not shown by the current self-tests failures > - in that situation. > > The criticial scenario is a bit different: the client created the > subflows, but it did not avertize any address with ADD_ADDR (the client > endpoints have the 'SUBFLOW' flag, not the 'SIGNAL' one). > Ok, that's the part I had missed. > If I read correctly the discussion about ADD_ADDR handling, a correct > and simple solution would be sending RM_ADDR only for 'SIGNAL' > endpoints, and update the test-cases accordingly. > Yeah, I think that would work. > It's not clear to me if in case of 'endpoint flush', we should keep > deleting all the subflows - including the ones not tied to 'SIGNAL' > endpoints on any end. > I like the idea of separating the "unadvertising" from subflow deletion. MPTCP_PM_CMD_FLUSH_ADDRS should continue to act the same as "foreach endpoint ID call MPTCP_PM_CMD_DEL_ADDR". Could add a flag to optionally delete/keep subflows? -- Mat Martineau Intel
© 2016 - 2026 Red Hat, Inc.