In some edge scenarios, an MPTCP subflows can use a local address
mapped by a "dummy" endpoint created by the in kernel path manager.
When such endpoint is deleted, the in kernel PM sends a RM_ADDR MPTCP
suboption. That is somewhat unexpected, as an MPTCP listener will keep
accepting incoming subflows targeting such address and the unexpected
options can confuse some self-tests.
Be more conservative about RM_ADDR generation: do it only if the
relevant endpoint has either the SIGNAL or SUBFLOW flag - it's not a
dummy one.
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
include/uapi/linux/mptcp.h | 1 +
net/mptcp/pm_netlink.c | 25 +++++++++++++++++--------
2 files changed, 18 insertions(+), 8 deletions(-)
diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h
index f106a3941cdf..34ca8c04f64e 100644
--- a/include/uapi/linux/mptcp.h
+++ b/include/uapi/linux/mptcp.h
@@ -81,6 +81,7 @@ enum {
#define MPTCP_PM_ADDR_FLAG_SUBFLOW (1 << 1)
#define MPTCP_PM_ADDR_FLAG_BACKUP (1 << 2)
#define MPTCP_PM_ADDR_FLAG_FULLMESH (1 << 3)
+#define MPTCP_PM_ADDR_FLAG_DUMMY (1 << 4)
enum {
MPTCP_PM_CMD_UNSPEC,
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 56f5603c10f2..928ebe4949e9 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -1036,7 +1036,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
entry->addr.id = 0;
entry->addr.port = 0;
entry->ifindex = 0;
- entry->flags = 0;
+ entry->flags = MPTCP_PM_ADDR_FLAG_DUMMY;
entry->lsk = NULL;
ret = mptcp_pm_nl_append_new_local_addr(pernet, entry);
if (ret < 0)
@@ -1238,6 +1238,11 @@ static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
}
+ if (addr.flags & MPTCP_PM_ADDR_FLAG_DUMMY) {
+ GENL_SET_ERR_MSG(info, "can't create DUMMY endpoint");
+ return -EINVAL;
+ }
+
entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry) {
GENL_SET_ERR_MSG(info, "can't allocate addr");
@@ -1322,11 +1327,12 @@ static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk,
}
static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net,
- struct mptcp_addr_info *addr)
+ const struct mptcp_pm_addr_entry *entry)
{
- struct mptcp_sock *msk;
- long s_slot = 0, s_num = 0;
+ const struct mptcp_addr_info *addr = &entry->addr;
struct mptcp_rm_list list = { .nr = 0 };
+ long s_slot = 0, s_num = 0;
+ struct mptcp_sock *msk;
pr_debug("remove_id=%d", addr->id);
@@ -1346,7 +1352,8 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net,
lock_sock(sk);
remove_subflow = lookup_subflow_by_saddr(&msk->conn_list, addr);
- mptcp_pm_remove_anno_addr(msk, addr, remove_subflow);
+ mptcp_pm_remove_anno_addr(msk, addr, remove_subflow &&
+ !(entry->flags & MPTCP_PM_ADDR_FLAG_DUMMY));
if (remove_subflow)
mptcp_pm_remove_subflow(msk, &list);
release_sock(sk);
@@ -1443,7 +1450,7 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info)
__clear_bit(entry->addr.id, pernet->id_bitmap);
spin_unlock_bh(&pernet->lock);
- mptcp_nl_remove_subflow_and_signal_addr(sock_net(skb->sk), &entry->addr);
+ mptcp_nl_remove_subflow_and_signal_addr(sock_net(skb->sk), entry);
synchronize_rcu();
__mptcp_pm_release_addr_entry(entry);
@@ -1458,9 +1465,11 @@ static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk,
list_for_each_entry(entry, rm_list, list) {
if (lookup_subflow_by_saddr(&msk->conn_list, &entry->addr) &&
- alist.nr < MPTCP_RM_IDS_MAX &&
slist.nr < MPTCP_RM_IDS_MAX) {
- alist.ids[alist.nr++] = entry->addr.id;
+ /* skip RM_ADDR for dummy endpoints */
+ if (!(entry->flags & MPTCP_PM_ADDR_FLAG_DUMMY) &&
+ alist.nr < MPTCP_RM_IDS_MAX)
+ alist.ids[alist.nr++] = entry->addr.id;
slist.ids[slist.nr++] = entry->addr.id;
} else if (remove_anno_list_by_saddr(msk, &entry->addr) &&
alist.nr < MPTCP_RM_IDS_MAX) {
--
2.34.1
On Thu, 10 Feb 2022, Paolo Abeni wrote:
> In some edge scenarios, an MPTCP subflows can use a local address
> mapped by a "dummy" endpoint created by the in kernel path manager.
>
> When such endpoint is deleted, the in kernel PM sends a RM_ADDR MPTCP
> suboption. That is somewhat unexpected, as an MPTCP listener will keep
> accepting incoming subflows targeting such address and the unexpected
> options can confuse some self-tests.
>
> Be more conservative about RM_ADDR generation: do it only if the
> relevant endpoint has either the SIGNAL or SUBFLOW flag - it's not a
> dummy one.
>
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> ---
> include/uapi/linux/mptcp.h | 1 +
> net/mptcp/pm_netlink.c | 25 +++++++++++++++++--------
> 2 files changed, 18 insertions(+), 8 deletions(-)
>
> diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h
> index f106a3941cdf..34ca8c04f64e 100644
> --- a/include/uapi/linux/mptcp.h
> +++ b/include/uapi/linux/mptcp.h
> @@ -81,6 +81,7 @@ enum {
> #define MPTCP_PM_ADDR_FLAG_SUBFLOW (1 << 1)
> #define MPTCP_PM_ADDR_FLAG_BACKUP (1 << 2)
> #define MPTCP_PM_ADDR_FLAG_FULLMESH (1 << 3)
> +#define MPTCP_PM_ADDR_FLAG_DUMMY (1 << 4)
Since this is a public API, "DUMMY" might be a confusing / ambiguous name.
MPTCP_PM_ADDR_FLAG_IMPLICIT_ENDPOINT or MPTCP_PM_ADDR_FLAG_UNADVERTISED
maybe? (open to other ideas of course)
It looks like these dummy/implicit records stay around until a flush
happens. What if there's a request to advertise an address that has had a
dummy created already? mptcp_pm_nl_append_new_local_addr() would consider
that a duplicate and reject it, but replacing the dummy record with a real
one would be better.
-Mat
>
> enum {
> MPTCP_PM_CMD_UNSPEC,
> diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
> index 56f5603c10f2..928ebe4949e9 100644
> --- a/net/mptcp/pm_netlink.c
> +++ b/net/mptcp/pm_netlink.c
> @@ -1036,7 +1036,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
> entry->addr.id = 0;
> entry->addr.port = 0;
> entry->ifindex = 0;
> - entry->flags = 0;
> + entry->flags = MPTCP_PM_ADDR_FLAG_DUMMY;
> entry->lsk = NULL;
> ret = mptcp_pm_nl_append_new_local_addr(pernet, entry);
> if (ret < 0)
> @@ -1238,6 +1238,11 @@ static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info)
> return -EINVAL;
> }
>
> + if (addr.flags & MPTCP_PM_ADDR_FLAG_DUMMY) {
> + GENL_SET_ERR_MSG(info, "can't create DUMMY endpoint");
> + return -EINVAL;
> + }
> +
> entry = kmalloc(sizeof(*entry), GFP_KERNEL);
> if (!entry) {
> GENL_SET_ERR_MSG(info, "can't allocate addr");
> @@ -1322,11 +1327,12 @@ static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk,
> }
>
> static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net,
> - struct mptcp_addr_info *addr)
> + const struct mptcp_pm_addr_entry *entry)
> {
> - struct mptcp_sock *msk;
> - long s_slot = 0, s_num = 0;
> + const struct mptcp_addr_info *addr = &entry->addr;
> struct mptcp_rm_list list = { .nr = 0 };
> + long s_slot = 0, s_num = 0;
> + struct mptcp_sock *msk;
>
> pr_debug("remove_id=%d", addr->id);
>
> @@ -1346,7 +1352,8 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net,
>
> lock_sock(sk);
> remove_subflow = lookup_subflow_by_saddr(&msk->conn_list, addr);
> - mptcp_pm_remove_anno_addr(msk, addr, remove_subflow);
> + mptcp_pm_remove_anno_addr(msk, addr, remove_subflow &&
> + !(entry->flags & MPTCP_PM_ADDR_FLAG_DUMMY));
> if (remove_subflow)
> mptcp_pm_remove_subflow(msk, &list);
> release_sock(sk);
> @@ -1443,7 +1450,7 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info)
> __clear_bit(entry->addr.id, pernet->id_bitmap);
> spin_unlock_bh(&pernet->lock);
>
> - mptcp_nl_remove_subflow_and_signal_addr(sock_net(skb->sk), &entry->addr);
> + mptcp_nl_remove_subflow_and_signal_addr(sock_net(skb->sk), entry);
> synchronize_rcu();
> __mptcp_pm_release_addr_entry(entry);
>
> @@ -1458,9 +1465,11 @@ static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk,
>
> list_for_each_entry(entry, rm_list, list) {
> if (lookup_subflow_by_saddr(&msk->conn_list, &entry->addr) &&
> - alist.nr < MPTCP_RM_IDS_MAX &&
> slist.nr < MPTCP_RM_IDS_MAX) {
> - alist.ids[alist.nr++] = entry->addr.id;
> + /* skip RM_ADDR for dummy endpoints */
> + if (!(entry->flags & MPTCP_PM_ADDR_FLAG_DUMMY) &&
> + alist.nr < MPTCP_RM_IDS_MAX)
> + alist.ids[alist.nr++] = entry->addr.id;
> slist.ids[slist.nr++] = entry->addr.id;
> } else if (remove_anno_list_by_saddr(msk, &entry->addr) &&
> alist.nr < MPTCP_RM_IDS_MAX) {
> --
> 2.34.1
>
>
>
--
Mat Martineau
Intel
On Fri, 2022-02-11 at 15:10 -0800, Mat Martineau wrote:
> On Thu, 10 Feb 2022, Paolo Abeni wrote:
>
> > In some edge scenarios, an MPTCP subflows can use a local address
> > mapped by a "dummy" endpoint created by the in kernel path manager.
> >
> > When such endpoint is deleted, the in kernel PM sends a RM_ADDR MPTCP
> > suboption. That is somewhat unexpected, as an MPTCP listener will keep
> > accepting incoming subflows targeting such address and the unexpected
> > options can confuse some self-tests.
> >
> > Be more conservative about RM_ADDR generation: do it only if the
> > relevant endpoint has either the SIGNAL or SUBFLOW flag - it's not a
> > dummy one.
> >
> > Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> > ---
> > include/uapi/linux/mptcp.h | 1 +
> > net/mptcp/pm_netlink.c | 25 +++++++++++++++++--------
> > 2 files changed, 18 insertions(+), 8 deletions(-)
> >
> > diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h
> > index f106a3941cdf..34ca8c04f64e 100644
> > --- a/include/uapi/linux/mptcp.h
> > +++ b/include/uapi/linux/mptcp.h
> > @@ -81,6 +81,7 @@ enum {
> > #define MPTCP_PM_ADDR_FLAG_SUBFLOW (1 << 1)
> > #define MPTCP_PM_ADDR_FLAG_BACKUP (1 << 2)
> > #define MPTCP_PM_ADDR_FLAG_FULLMESH (1 << 3)
> > +#define MPTCP_PM_ADDR_FLAG_DUMMY (1 << 4)
>
> Since this is a public API, "DUMMY" might be a confusing / ambiguous name.
> MPTCP_PM_ADDR_FLAG_IMPLICIT_ENDPOINT or MPTCP_PM_ADDR_FLAG_UNADVERTISED
> maybe? (open to other ideas of course)
I think "IMPLICIT" is the better option, as it's both unadvertised, not
used for subflow.
>
> It looks like these dummy/implicit records stay around until a flush
> happens. What if there's a request to advertise an address that has had a
> dummy created already? mptcp_pm_nl_append_new_local_addr() would consider
> that a duplicate and reject it, but replacing the dummy record with a real
> one would be better.
Agreed. I'll do that in the next iteration.
/P
© 2016 - 2026 Red Hat, Inc.