[PATCH mptcp-net 3/3] mptcp: fix memcg accounting for passive sockets

Paolo Abeni posted 3 patches 2 weeks, 5 days ago
[PATCH mptcp-net 3/3] mptcp: fix memcg accounting for passive sockets
Posted by Paolo Abeni 2 weeks, 5 days ago
The passive sockets never got proper memcg accounting: the msk
socket is associated with the memcg at accept time, but the
passive subflows never got it right.

At accept time, traverse the subflows list and associate each of them
with the msk memcg, and try to do the same at join completion time, if
the msk has been already accepted.

Fixes: cf7da0d66cc1 ("mptcp: Create SUBFLOW socket for incoming connections")
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/298
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/597
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/mptcp/protocol.c | 36 ++++++++++++++++++++++++++----------
 net/mptcp/protocol.h |  1 +
 net/mptcp/subflow.c  | 10 ++++++++++
 3 files changed, 37 insertions(+), 10 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 34e6bc731085..5e9325c7ea9c 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -959,6 +959,8 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
 	mptcp_subflow_ctx(ssk)->subflow_id = msk->subflow_id++;
 	mptcp_sockopt_sync_locked(msk, ssk);
 	mptcp_stop_tout_timer(sk);
+	__mptcp_inherit_cgrp_data(sk, ssk);
+	__mptcp_inherit_memcg(sk, ssk, GFP_ATOMIC);
 	__mptcp_propagate_sndbuf(sk, ssk);
 	__mptcp_propagate_rcvspace(sk, ssk);
 	return true;
@@ -4076,6 +4078,29 @@ static int mptcp_listen(struct socket *sock, int backlog)
 	return err;
 }
 
+static void mptcp_graph_subflows(struct sock *sk)
+{
+	struct mptcp_subflow_context *subflow;
+	struct mptcp_sock *msk = mptcp_sk(sk);
+
+	mptcp_for_each_subflow(msk, subflow) {
+		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+		bool slow;
+
+		slow = lock_sock_fast(ssk);
+
+		/* set ssk->sk_socket of accept()ed flows to mptcp socket.
+		 * This is needed so NOSPACE flag can be set from tcp stack.
+		 */
+		if (!ssk->sk_socket)
+			mptcp_sock_graft(ssk, sk->sk_socket);
+
+		__mptcp_inherit_cgrp_data(sk, ssk);
+		__mptcp_inherit_memcg(sk, ssk, GFP_KERNEL);
+		unlock_sock_fast(ssk, slow);
+	}
+}
+
 static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
 			       struct proto_accept_arg *arg)
 {
@@ -4123,16 +4148,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
 		msk = mptcp_sk(newsk);
 		msk->in_accept_queue = 0;
 
-		/* set ssk->sk_socket of accept()ed flows to mptcp socket.
-		 * This is needed so NOSPACE flag can be set from tcp stack.
-		 */
-		mptcp_for_each_subflow(msk, subflow) {
-			struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
-
-			if (!ssk->sk_socket)
-				mptcp_sock_graft(ssk, newsock);
-		}
-
+		mptcp_graph_subflows(newsk);
 		mptcp_rps_record_subflows(msk);
 
 		/* Do late cleanup for the first subflow as necessary. Also
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 300ac7030958..60bfe50530dc 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -755,6 +755,7 @@ mptcp_subflow_delegated_next(struct mptcp_delegated_action *delegated)
 	return ret;
 }
 
+void __mptcp_inherit_memcg(struct sock *sk, struct sock *ssk, gfp_t gfp);
 void __mptcp_inherit_cgrp_data(struct sock *sk, struct sock *ssk);
 
 int mptcp_is_enabled(const struct net *net);
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 599a21d92590..39bf69e73975 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1721,6 +1721,16 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_pm_local *local,
 	return err;
 }
 
+void __mptcp_inherit_memcg(struct sock *sk, struct sock *ssk, gfp_t gfp)
+{
+	/* Only if the msk has been accepted already (and not orphaned).*/
+	if (!mem_cgroup_sockets_enabled || !sk->sk_socket)
+		return;
+
+	mem_cgroup_sk_inherit(sk, ssk);
+	__sk_charge(ssk, gfp);
+}
+
 void __mptcp_inherit_cgrp_data(struct sock *sk, struct sock *ssk)
 {
 #ifdef CONFIG_SOCK_CGROUP_DATA
-- 
2.51.0
Re: [PATCH mptcp-net 3/3] mptcp: fix memcg accounting for passive sockets
Posted by Geliang Tang 2 weeks, 2 days ago
Hi Paolo,

Thanks for this fix.

On Fri, 2025-11-07 at 22:55 +0100, Paolo Abeni wrote:
> The passive sockets never got proper memcg accounting: the msk
> socket is associated with the memcg at accept time, but the
> passive subflows never got it right.
> 
> At accept time, traverse the subflows list and associate each of them
> with the msk memcg, and try to do the same at join completion time,
> if
> the msk has been already accepted.
> 
> Fixes: cf7da0d66cc1 ("mptcp: Create SUBFLOW socket for incoming
> connections")
> Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/298
> Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/597
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> ---
>  net/mptcp/protocol.c | 36 ++++++++++++++++++++++++++----------
>  net/mptcp/protocol.h |  1 +
>  net/mptcp/subflow.c  | 10 ++++++++++
>  3 files changed, 37 insertions(+), 10 deletions(-)
> 
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 34e6bc731085..5e9325c7ea9c 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -959,6 +959,8 @@ static bool __mptcp_finish_join(struct mptcp_sock
> *msk, struct sock *ssk)
>  	mptcp_subflow_ctx(ssk)->subflow_id = msk->subflow_id++;
>  	mptcp_sockopt_sync_locked(msk, ssk);
>  	mptcp_stop_tout_timer(sk);
> +	__mptcp_inherit_cgrp_data(sk, ssk);
> +	__mptcp_inherit_memcg(sk, ssk, GFP_ATOMIC);

In mptcp_graph_subflows(), these two lines already have the
mem_cgroup_from_sk() check (in the squash-to patch). It's unclear
whether mem_cgroup_from_sk() check needs to be added here too.

Thanks,
-Geliang

>  	__mptcp_propagate_sndbuf(sk, ssk);
>  	__mptcp_propagate_rcvspace(sk, ssk);
>  	return true;
> @@ -4076,6 +4078,29 @@ static int mptcp_listen(struct socket *sock,
> int backlog)
>  	return err;
>  }
>  
> +static void mptcp_graph_subflows(struct sock *sk)
> +{
> +	struct mptcp_subflow_context *subflow;
> +	struct mptcp_sock *msk = mptcp_sk(sk);
> +
> +	mptcp_for_each_subflow(msk, subflow) {
> +		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
> +		bool slow;
> +
> +		slow = lock_sock_fast(ssk);
> +
> +		/* set ssk->sk_socket of accept()ed flows to mptcp
> socket.
> +		 * This is needed so NOSPACE flag can be set from
> tcp stack.
> +		 */
> +		if (!ssk->sk_socket)
> +			mptcp_sock_graft(ssk, sk->sk_socket);
> +
> +		__mptcp_inherit_cgrp_data(sk, ssk);
> +		__mptcp_inherit_memcg(sk, ssk, GFP_KERNEL);
> +		unlock_sock_fast(ssk, slow);
> +	}
> +}
> +
>  static int mptcp_stream_accept(struct socket *sock, struct socket
> *newsock,
>  			       struct proto_accept_arg *arg)
>  {
> @@ -4123,16 +4148,7 @@ static int mptcp_stream_accept(struct socket
> *sock, struct socket *newsock,
>  		msk = mptcp_sk(newsk);
>  		msk->in_accept_queue = 0;
>  
> -		/* set ssk->sk_socket of accept()ed flows to mptcp
> socket.
> -		 * This is needed so NOSPACE flag can be set from
> tcp stack.
> -		 */
> -		mptcp_for_each_subflow(msk, subflow) {
> -			struct sock *ssk =
> mptcp_subflow_tcp_sock(subflow);
> -
> -			if (!ssk->sk_socket)
> -				mptcp_sock_graft(ssk, newsock);
> -		}
> -
> +		mptcp_graph_subflows(newsk);
>  		mptcp_rps_record_subflows(msk);
>  
>  		/* Do late cleanup for the first subflow as
> necessary. Also
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index 300ac7030958..60bfe50530dc 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -755,6 +755,7 @@ mptcp_subflow_delegated_next(struct
> mptcp_delegated_action *delegated)
>  	return ret;
>  }
>  
> +void __mptcp_inherit_memcg(struct sock *sk, struct sock *ssk, gfp_t
> gfp);
>  void __mptcp_inherit_cgrp_data(struct sock *sk, struct sock *ssk);
>  
>  int mptcp_is_enabled(const struct net *net);
> diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
> index 599a21d92590..39bf69e73975 100644
> --- a/net/mptcp/subflow.c
> +++ b/net/mptcp/subflow.c
> @@ -1721,6 +1721,16 @@ int __mptcp_subflow_connect(struct sock *sk,
> const struct mptcp_pm_local *local,
>  	return err;
>  }
>  
> +void __mptcp_inherit_memcg(struct sock *sk, struct sock *ssk, gfp_t
> gfp)
> +{
> +	/* Only if the msk has been accepted already (and not
> orphaned).*/
> +	if (!mem_cgroup_sockets_enabled || !sk->sk_socket)
> +		return;
> +
> +	mem_cgroup_sk_inherit(sk, ssk);
> +	__sk_charge(ssk, gfp);
> +}
> +
>  void __mptcp_inherit_cgrp_data(struct sock *sk, struct sock *ssk)
>  {
>  #ifdef CONFIG_SOCK_CGROUP_DATA

Re: [PATCH mptcp-net 3/3] mptcp: fix memcg accounting for passive sockets
Posted by Matthieu Baerts 2 weeks, 1 day ago
Hi Geliang,

On 11/11/2025 09:37, Geliang Tang wrote:
> Hi Paolo,
> 
> Thanks for this fix.
> 
> On Fri, 2025-11-07 at 22:55 +0100, Paolo Abeni wrote:
>> The passive sockets never got proper memcg accounting: the msk
>> socket is associated with the memcg at accept time, but the
>> passive subflows never got it right.
>>
>> At accept time, traverse the subflows list and associate each of them
>> with the msk memcg, and try to do the same at join completion time,
>> if
>> the msk has been already accepted.
>>
>> Fixes: cf7da0d66cc1 ("mptcp: Create SUBFLOW socket for incoming
>> connections")
>> Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/298
>> Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/597
>> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
>> ---
>>  net/mptcp/protocol.c | 36 ++++++++++++++++++++++++++----------
>>  net/mptcp/protocol.h |  1 +
>>  net/mptcp/subflow.c  | 10 ++++++++++
>>  3 files changed, 37 insertions(+), 10 deletions(-)
>>
>> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
>> index 34e6bc731085..5e9325c7ea9c 100644
>> --- a/net/mptcp/protocol.c
>> +++ b/net/mptcp/protocol.c
>> @@ -959,6 +959,8 @@ static bool __mptcp_finish_join(struct mptcp_sock
>> *msk, struct sock *ssk)
>>  	mptcp_subflow_ctx(ssk)->subflow_id = msk->subflow_id++;
>>  	mptcp_sockopt_sync_locked(msk, ssk);
>>  	mptcp_stop_tout_timer(sk);
>> +	__mptcp_inherit_cgrp_data(sk, ssk);
>> +	__mptcp_inherit_memcg(sk, ssk, GFP_ATOMIC);
> 
> In mptcp_graph_subflows(), these two lines already have the
> mem_cgroup_from_sk() check (in the squash-to patch). It's unclear
> whether mem_cgroup_from_sk() check needs to be added here too.

From what I understood, no need to check mem_cgroup_from_sk() here:

- __mptcp_inherit_cgrp_data() should not depend on that (I think)
- __mptcp_inherit_memcg() is doing that via mem_cgroup_sockets_enabled()

I *think* the squash-to patch needs some small further modifications,
but this series looks good to me.

Cheers,
Matt
-- 
Sponsored by the NGI0 Core fund.

Re: [PATCH mptcp-net 3/3] mptcp: fix memcg accounting for passive sockets
Posted by Paolo Abeni 2 weeks, 1 day ago
On 11/11/25 6:25 PM, Matthieu Baerts wrote:
> On 11/11/2025 09:37, Geliang Tang wrote:
>> Hi Paolo,
>>
>> Thanks for this fix.
>>
>> On Fri, 2025-11-07 at 22:55 +0100, Paolo Abeni wrote:
>>> The passive sockets never got proper memcg accounting: the msk
>>> socket is associated with the memcg at accept time, but the
>>> passive subflows never got it right.
>>>
>>> At accept time, traverse the subflows list and associate each of them
>>> with the msk memcg, and try to do the same at join completion time,
>>> if
>>> the msk has been already accepted.
>>>
>>> Fixes: cf7da0d66cc1 ("mptcp: Create SUBFLOW socket for incoming
>>> connections")
>>> Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/298
>>> Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/597
>>> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
>>> ---
>>>  net/mptcp/protocol.c | 36 ++++++++++++++++++++++++++----------
>>>  net/mptcp/protocol.h |  1 +
>>>  net/mptcp/subflow.c  | 10 ++++++++++
>>>  3 files changed, 37 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
>>> index 34e6bc731085..5e9325c7ea9c 100644
>>> --- a/net/mptcp/protocol.c
>>> +++ b/net/mptcp/protocol.c
>>> @@ -959,6 +959,8 @@ static bool __mptcp_finish_join(struct mptcp_sock
>>> *msk, struct sock *ssk)
>>>  	mptcp_subflow_ctx(ssk)->subflow_id = msk->subflow_id++;
>>>  	mptcp_sockopt_sync_locked(msk, ssk);
>>>  	mptcp_stop_tout_timer(sk);
>>> +	__mptcp_inherit_cgrp_data(sk, ssk);
>>> +	__mptcp_inherit_memcg(sk, ssk, GFP_ATOMIC);
>>
>> In mptcp_graph_subflows(), these two lines already have the
>> mem_cgroup_from_sk() check (in the squash-to patch). It's unclear
>> whether mem_cgroup_from_sk() check needs to be added here too.
> 
> From what I understood, no need to check mem_cgroup_from_sk() here:
> 
> - __mptcp_inherit_cgrp_data() should not depend on that (I think)
> - __mptcp_inherit_memcg() is doing that via mem_cgroup_sockets_enabled()

The mem_cgroup_sockets_enabled() check is optional, mainly used a
performance optimization, as it will cut the `than` branch via static
branch.

To keep the code small I chose to avoid the optimization here, but still
kept it in mptcp_graph_subflows() because there is much more stuff to be
avoided (a whole loop with per subflow lock pair).

/P