net/mptcp/pm_userspace.c | 24 +++++++++++++++--------- net/mptcp/protocol.h | 2 ++ 2 files changed, 17 insertions(+), 9 deletions(-)
From: Geliang Tang <tanggeliang@kylinos.cn>
In mptcp_pm_nl_remove_doit(), sk_omem_alloc is decremented immediately
but the memory is freed later via kfree_rcu(). This allows a CAP_NET_ADMIN
user to bypass the socket memory quota and exhaust kernel memory by
accumulating RCU callbacks.
Fix by using call_rcu() with a custom callback that uses sock_kfree_s()
to free the entry and decrement sk_omem_alloc atomically. To ensure the
socket remains valid until the callback runs, take a reference with
sock_hold() when storing the socket pointer in the entry, and release it
with sock_put() in the callback.
Convert the synchronous freeing paths in free_local_addr_list() and
delete_local_addr() to use the same RCU callback, ensuring the socket
reference is properly released.
Fixes: 13b4ece33cf9 ("mptcp: pm: Defer freeing of MPTCP userspace path manager entries")
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
---
v2:
- call mptcp_userspace_pm_free_entry in free_local_addr_list and
delete_local_addr.
v1:
- Link: https://patchwork.kernel.org/project/mptcp/patch/9b443bafa57f40a51eb6a43f088ff37d71b39973.1782528088.git.tanggeliang@kylinos.cn/
This patch addresses the pre-existing issue Sashiko mentioned in
https://sashiko.dev/#/patchset/cover.1782457962.git.tanggeliang@kylinos.cn.
---
net/mptcp/pm_userspace.c | 24 +++++++++++++++---------
net/mptcp/protocol.h | 2 ++
2 files changed, 17 insertions(+), 9 deletions(-)
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
index ad6ba658e5a5..60b6c06b91aa 100644
--- a/net/mptcp/pm_userspace.c
+++ b/net/mptcp/pm_userspace.c
@@ -12,10 +12,19 @@
list_for_each_entry(__entry, \
&((__msk)->pm.userspace_pm_local_addr_list), list)
+static void mptcp_userspace_pm_free_entry(struct rcu_head *head)
+{
+ struct mptcp_pm_addr_entry *entry =
+ container_of(head, struct mptcp_pm_addr_entry, rcu);
+ struct sock *sk = entry->sk;
+
+ sock_kfree_s(sk, entry, sizeof(*entry));
+ sock_put(sk);
+}
+
void mptcp_userspace_pm_free_local_addr_list(struct mptcp_sock *msk)
{
struct mptcp_pm_addr_entry *entry, *tmp;
- struct sock *sk = (struct sock *)msk;
LIST_HEAD(free_list);
spin_lock_bh(&msk->pm.lock);
@@ -23,7 +32,7 @@ void mptcp_userspace_pm_free_local_addr_list(struct mptcp_sock *msk)
spin_unlock_bh(&msk->pm.lock);
list_for_each_entry_safe(entry, tmp, &free_list, list) {
- sock_kfree_s(sk, entry, sizeof(*entry));
+ call_rcu(&entry->rcu, mptcp_userspace_pm_free_entry);
}
}
@@ -73,6 +82,8 @@ static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
ret = -ENOMEM;
goto append_err;
}
+ sock_hold(sk);
+ e->sk = sk;
if (!e->addr.id && needs_id)
e->addr.id = find_next_zero_bit(id_bitmap,
@@ -98,7 +109,6 @@ static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
static int mptcp_userspace_pm_delete_local_addr(struct mptcp_sock *msk,
struct mptcp_pm_addr_entry *addr)
{
- struct sock *sk = (struct sock *)msk;
struct mptcp_pm_addr_entry *entry;
entry = mptcp_userspace_pm_lookup_addr(msk, &addr->addr);
@@ -109,7 +119,7 @@ static int mptcp_userspace_pm_delete_local_addr(struct mptcp_sock *msk,
* be used multiple times (e.g. fullmesh mode).
*/
list_del_rcu(&entry->list);
- sock_kfree_s(sk, entry, sizeof(*entry));
+ call_rcu(&entry->rcu, mptcp_userspace_pm_free_entry);
msk->pm.local_addr_used--;
return 0;
}
@@ -337,11 +347,7 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info)
release_sock(sk);
- kfree_rcu_mightsleep(match);
- /* Adjust sk_omem_alloc like sock_kfree_s() does, to match
- * with allocation of this memory by sock_kmemdup()
- */
- atomic_sub(sizeof(*match), &sk->sk_omem_alloc);
+ call_rcu(&match->rcu, mptcp_userspace_pm_free_entry);
err = 0;
out:
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index da40c6f3705f..250736eae0be 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -257,6 +257,8 @@ struct mptcp_pm_addr_entry {
u32 flags;
int ifindex;
struct socket *lsk;
+ struct sock *sk;
+ struct rcu_head rcu;
};
struct mptcp_data_frag {
--
2.53.0
Hi Geliang,
Thank you for your modifications, that's great!
Our CI did some validations and here is its report:
- KVM Validation: normal (except selftest_mptcp_join): Success! ✅
- KVM Validation: normal (only selftest_mptcp_join): Success! ✅
- KVM Validation: debug (except selftest_mptcp_join): Success! ✅
- KVM Validation: debug (only selftest_mptcp_join): Unstable: 1 failed test(s): selftest_mptcp_join ⚠️
- KVM Validation: btf-normal (only bpftest_all): Success! ✅
- KVM Validation: btf-debug (only bpftest_all): Success! ✅
- Task: https://github.com/multipath-tcp/mptcp_net-next/actions/runs/28424924302
Initiator: Patchew Applier
Commits: https://github.com/multipath-tcp/mptcp_net-next/commits/a56918fdc117
Patchwork: https://patchwork.kernel.org/project/mptcp/list/?series=1118650
If there are some issues, you can reproduce them using the same environment as
the one used by the CI thanks to a docker image, e.g.:
$ cd [kernel source code]
$ docker run -v "${PWD}:${PWD}:rw" -w "${PWD}" --privileged --rm -it \
--pull always mptcp/mptcp-upstream-virtme-docker:latest \
auto-normal
For more details:
https://github.com/multipath-tcp/mptcp-upstream-virtme-docker
Please note that despite all the efforts that have been already done to have a
stable tests suite when executed on a public CI like here, it is possible some
reported issues are not due to your modifications. Still, do not hesitate to
help us improve that ;-)
Cheers,
MPTCP GH Action bot
Bot operated by Matthieu Baerts (NGI0 Core)
© 2016 - 2026 Red Hat, Inc.