From: Geliang Tang <tanggeliang@kylinos.cn>
This patch adds .established and .subflow_established interfaces for
struct mptcp_pm_ops, and calls pm->ops->established/subflow_established
in from mptcp_pm_worker(). Then get rid of the corresponding code from
__mptcp_pm_kernel_worker().
Since mptcp_pm_addr_send_ack() is a sleepable kfunc, which is invoked
by mptcp_pm_create_subflow_or_signal_addr(), .established() and
.subflow_established() interfaces of BPF PM should be invoked by
__bpf_prog_enter_sleepable(), which can't be invoked under a lock.
This patch unlocks the pm lock before invoking this interface in
mptcp_pm_worker(), while holding this lock in mptcp_pm_kernel_established()
and mptcp_pm_kernel_subflow_established().
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
---
include/net/mptcp.h | 4 ++++
net/mptcp/pm.c | 32 ++++++++++++++++++++++++--------
net/mptcp/pm_kernel.c | 25 +++++++++++--------------
3 files changed, 39 insertions(+), 22 deletions(-)
diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index 9f28ef550e10..4ac936e4ce0d 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -124,6 +124,10 @@ struct mptcp_pm_ops {
bool (*get_priority)(struct mptcp_sock *msk,
struct mptcp_addr_info *skc);
+ /* optional */
+ void (*established)(struct mptcp_sock *msk);
+ void (*subflow_established)(struct mptcp_sock *msk);
+
char name[MPTCP_PM_NAME_MAX];
struct module *owner;
struct list_head list;
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index ba7424582ebf..e2b2c874a9f8 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -516,7 +516,8 @@ void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk)
* be sure to serve this event only once.
*/
if (READ_ONCE(pm->work_pending) &&
- !(pm->status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)))
+ !(pm->status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)) &&
+ pm->ops->established)
mptcp_pm_schedule_work(msk, MPTCP_PM_ESTABLISHED);
if ((pm->status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)) == 0)
@@ -543,7 +544,7 @@ void mptcp_pm_subflow_established(struct mptcp_sock *msk)
pr_debug("msk=%p\n", msk);
- if (!READ_ONCE(pm->work_pending))
+ if (!READ_ONCE(pm->work_pending) || !pm->ops->subflow_established)
return;
spin_lock_bh(&pm->lock);
@@ -570,7 +571,8 @@ void mptcp_pm_subflow_check_next(struct mptcp_sock *msk,
return;
}
- if (!READ_ONCE(pm->work_pending) && !update_subflows)
+ if (!pm->ops->subflow_established ||
+ (!READ_ONCE(pm->work_pending) && !update_subflows))
return;
spin_lock_bh(&pm->lock);
@@ -628,7 +630,7 @@ void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk,
pr_debug("msk=%p\n", msk);
- if (!READ_ONCE(pm->work_pending))
+ if (!READ_ONCE(pm->work_pending) || !pm->ops->subflow_established)
return;
spin_lock_bh(&pm->lock);
@@ -949,20 +951,34 @@ void mptcp_pm_worker(struct mptcp_sock *msk)
if (!(pm->status & MPTCP_PM_WORK_MASK))
return;
- spin_lock_bh(&msk->pm.lock);
-
pr_debug("msk=%p status=%x\n", msk, pm->status);
if (pm->status & BIT(MPTCP_PM_ADD_ADDR_SEND_ACK)) {
+ spin_lock_bh(&pm->lock);
pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_SEND_ACK);
mptcp_pm_addr_send_ack(msk);
+ spin_unlock_bh(&pm->lock);
}
if (pm->status & BIT(MPTCP_PM_RM_ADDR_RECEIVED)) {
+ spin_lock_bh(&pm->lock);
pm->status &= ~BIT(MPTCP_PM_RM_ADDR_RECEIVED);
mptcp_pm_rm_addr_recv(msk);
+ spin_unlock_bh(&pm->lock);
+ }
+ if (pm->status & BIT(MPTCP_PM_ESTABLISHED)) {
+ spin_lock_bh(&pm->lock);
+ pm->status &= ~BIT(MPTCP_PM_ESTABLISHED);
+ spin_unlock_bh(&pm->lock);
+ pm->ops->established(msk);
+ }
+ if (pm->status & BIT(MPTCP_PM_SUBFLOW_ESTABLISHED)) {
+ spin_lock_bh(&pm->lock);
+ pm->status &= ~BIT(MPTCP_PM_SUBFLOW_ESTABLISHED);
+ spin_unlock_bh(&pm->lock);
+ pm->ops->subflow_established(msk);
}
+ spin_lock_bh(&pm->lock);
__mptcp_pm_kernel_worker(msk);
-
- spin_unlock_bh(&msk->pm.lock);
+ spin_unlock_bh(&pm->lock);
}
static void mptcp_pm_ops_init(struct mptcp_sock *msk,
diff --git a/net/mptcp/pm_kernel.c b/net/mptcp/pm_kernel.c
index 7ec81d5195d4..2e181224bccb 100644
--- a/net/mptcp/pm_kernel.c
+++ b/net/mptcp/pm_kernel.c
@@ -269,6 +269,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
local_addr_max = mptcp_pm_get_local_addr_max(msk);
subflows_max = mptcp_pm_get_subflows_max(msk);
+ spin_lock_bh(&msk->pm.lock);
/* do lazy endpoint usage accounting for the MPC subflows */
if (unlikely(!(msk->pm.status & BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED))) && msk->first) {
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(msk->first);
@@ -307,7 +308,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
* current address announce will be completed.
*/
if (msk->pm.addr_signal & BIT(MPTCP_ADD_ADDR_SIGNAL))
- return;
+ goto out;
if (!select_signal_address(pernet, msk, &local))
goto subflow;
@@ -316,7 +317,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
* continuing, and trying to create subflows.
*/
if (!mptcp_pm_alloc_anno_list(msk, &local.addr))
- return;
+ goto out;
__clear_bit(local.addr.id, msk->pm.id_avail_bitmap);
msk->pm.add_addr_signaled++;
@@ -365,14 +366,16 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
spin_lock_bh(&msk->pm.lock);
}
mptcp_pm_nl_check_work_pending(msk);
+out:
+ spin_unlock_bh(&msk->pm.lock);
}
-static void mptcp_pm_nl_fully_established(struct mptcp_sock *msk)
+static void mptcp_pm_kernel_established(struct mptcp_sock *msk)
{
mptcp_pm_create_subflow_or_signal_addr(msk);
}
-static void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk)
+static void mptcp_pm_kernel_subflow_established(struct mptcp_sock *msk)
{
mptcp_pm_create_subflow_or_signal_addr(msk);
}
@@ -758,8 +761,8 @@ static int mptcp_nl_add_subflow_or_signal_addr(struct net *net,
spin_lock_bh(&msk->pm.lock);
if (mptcp_addresses_equal(addr, &mpc_addr, addr->port))
msk->mpc_endpoint_id = addr->id;
- mptcp_pm_create_subflow_or_signal_addr(msk);
spin_unlock_bh(&msk->pm.lock);
+ mptcp_pm_create_subflow_or_signal_addr(msk);
release_sock(sk);
next:
@@ -1243,8 +1246,8 @@ static void mptcp_pm_nl_fullmesh(struct mptcp_sock *msk,
spin_lock_bh(&msk->pm.lock);
mptcp_pm_rm_subflow(msk, &list);
__mark_subflow_endp_available(msk, list.ids[0]);
- mptcp_pm_create_subflow_or_signal_addr(msk);
spin_unlock_bh(&msk->pm.lock);
+ mptcp_pm_create_subflow_or_signal_addr(msk);
}
static void mptcp_pm_nl_set_flags_all(struct net *net,
@@ -1348,14 +1351,6 @@ void __mptcp_pm_kernel_worker(struct mptcp_sock *msk)
pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_RECEIVED);
mptcp_pm_nl_add_addr_received(msk);
}
- if (pm->status & BIT(MPTCP_PM_ESTABLISHED)) {
- pm->status &= ~BIT(MPTCP_PM_ESTABLISHED);
- mptcp_pm_nl_fully_established(msk);
- }
- if (pm->status & BIT(MPTCP_PM_SUBFLOW_ESTABLISHED)) {
- pm->status &= ~BIT(MPTCP_PM_SUBFLOW_ESTABLISHED);
- mptcp_pm_nl_subflow_established(msk);
- }
}
static int __net_init pm_nl_init_net(struct net *net)
@@ -1422,6 +1417,8 @@ static void mptcp_pm_kernel_init(struct mptcp_sock *msk)
struct mptcp_pm_ops mptcp_pm_kernel = {
.get_local_id = mptcp_pm_kernel_get_local_id,
.get_priority = mptcp_pm_kernel_get_priority,
+ .established = mptcp_pm_kernel_established,
+ .subflow_established = mptcp_pm_kernel_subflow_established,
.init = mptcp_pm_kernel_init,
.name = "kernel",
.owner = THIS_MODULE,
--
2.43.0
Hi Geliang, On 24/03/2025 09:19, Geliang Tang wrote: > From: Geliang Tang <tanggeliang@kylinos.cn> > > This patch adds .established and .subflow_established interfaces for > struct mptcp_pm_ops, and calls pm->ops->established/subflow_established > in from mptcp_pm_worker(). Then get rid of the corresponding code from > __mptcp_pm_kernel_worker(). > > Since mptcp_pm_addr_send_ack() is a sleepable kfunc, which is invoked > by mptcp_pm_create_subflow_or_signal_addr(), .established() and > .subflow_established() interfaces of BPF PM should be invoked by > __bpf_prog_enter_sleepable(), which can't be invoked under a lock. > This patch unlocks the pm lock before invoking this interface in > mptcp_pm_worker(), while holding this lock in mptcp_pm_kernel_established() > and mptcp_pm_kernel_subflow_established(). (...) > @@ -949,20 +951,34 @@ void mptcp_pm_worker(struct mptcp_sock *msk) > if (!(pm->status & MPTCP_PM_WORK_MASK)) > return; > > - spin_lock_bh(&msk->pm.lock); > - > pr_debug("msk=%p status=%x\n", msk, pm->status); > if (pm->status & BIT(MPTCP_PM_ADD_ADDR_SEND_ACK)) { This should probably be read under the lock or with a READ_ONCE. Or manipulating pm->status before, then running the different actions. I need to think about that. I also think it might be good to have a dedicated patch moving the locking mechanisms first, then introducing the new callbacks. I will discuss that with Mat on Wednesday. Do you mind holding new versions for this series until then please? Cheers, Matt -- Sponsored by the NGI0 Core fund.
© 2016 - 2025 Red Hat, Inc.