[PATCH mptcp-next 2/3] mptcp: implement .splice_eof

Geliang Tang posted 3 patches 1 week ago
There is a newer version of this series
[PATCH mptcp-next 2/3] mptcp: implement .splice_eof
Posted by Geliang Tang 1 week ago
From: Geliang Tang <tanggeliang@kylinos.cn>

This patch implements the .splice_eof interface for MPTCP, namely
mptcp_splice_eof(), which sequentially calls do_tcp_splice_eof() for
each subflow.

Suggested-by: Matthieu Baerts <matttbe@kernel.org>
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
---
 net/mptcp/protocol.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index c88882062c40..5635d196cb9f 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -4018,6 +4018,20 @@ static int mptcp_connect(struct sock *sk, struct sockaddr_unsized *uaddr,
 	return 0;
 }
 
+static void mptcp_splice_eof(struct socket *sock)
+{
+	struct mptcp_subflow_context *subflow;
+	struct sock *sk = sock->sk, *ssk;
+
+	lock_sock(sk);
+	mptcp_for_each_subflow(mptcp_sk(sk), subflow) {
+		ssk = mptcp_subflow_tcp_sock(subflow);
+
+		do_tcp_splice_eof(ssk);
+	}
+	release_sock(sk);
+}
+
 static struct proto mptcp_prot = {
 	.name		= "MPTCP",
 	.owner		= THIS_MODULE,
@@ -4049,6 +4063,7 @@ static struct proto mptcp_prot = {
 	.obj_size	= sizeof(struct mptcp_sock),
 	.slab_flags	= SLAB_TYPESAFE_BY_RCU,
 	.no_autobind	= true,
+	.splice_eof	= mptcp_splice_eof,
 };
 
 static int mptcp_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len)
@@ -4540,6 +4555,7 @@ static const struct proto_ops mptcp_stream_ops = {
 	.set_rcvlowat	   = mptcp_set_rcvlowat,
 	.read_sock	   = mptcp_read_sock,
 	.splice_read	   = mptcp_splice_read,
+	.splice_eof	   = inet_splice_eof,
 };
 
 static struct inet_protosw mptcp_protosw = {
-- 
2.51.0
Re: [PATCH mptcp-next 2/3] mptcp: implement .splice_eof
Posted by Matthieu Baerts 1 week ago
Hi Geliang,

Thank you for looking at that!

On 02/02/2026 10:21, Geliang Tang wrote:
> From: Geliang Tang <tanggeliang@kylinos.cn>
> 
> This patch implements the .splice_eof interface for MPTCP, namely
> mptcp_splice_eof(), which sequentially calls do_tcp_splice_eof() for
> each subflow.

Can you please explain what this hook is supposed to do / used for please?
And also why the solution is to call do_tcp_splice_eof() on each subflow?

Also, I'm a bit confused: why is this needed? Does it fix something or
is it a new feature or an optimisation?

> Suggested-by: Matthieu Baerts <matttbe@kernel.org>
> Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
> ---
>  net/mptcp/protocol.c | 16 ++++++++++++++++
>  1 file changed, 16 insertions(+)
> 
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index c88882062c40..5635d196cb9f 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -4018,6 +4018,20 @@ static int mptcp_connect(struct sock *sk, struct sockaddr_unsized *uaddr,
>  	return 0;
>  }
>  
> +static void mptcp_splice_eof(struct socket *sock)
> +{
> +	struct mptcp_subflow_context *subflow;
> +	struct sock *sk = sock->sk, *ssk;
> +
> +	lock_sock(sk);
> +	mptcp_for_each_subflow(mptcp_sk(sk), subflow) {
> +		ssk = mptcp_subflow_tcp_sock(subflow);
> +
> +		do_tcp_splice_eof(ssk);

Is it fine to call this on closed subflows? e.g. if the initial subflow
has been closed. (I didn't check, maybe that's OK)

> +	}
> +	release_sock(sk);
> +}
> +
>  static struct proto mptcp_prot = {
>  	.name		= "MPTCP",
>  	.owner		= THIS_MODULE,
> @@ -4049,6 +4063,7 @@ static struct proto mptcp_prot = {
>  	.obj_size	= sizeof(struct mptcp_sock),
>  	.slab_flags	= SLAB_TYPESAFE_BY_RCU,
>  	.no_autobind	= true,
> +	.splice_eof	= mptcp_splice_eof,
>  };
>  
>  static int mptcp_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len)
> @@ -4540,6 +4555,7 @@ static const struct proto_ops mptcp_stream_ops = {
>  	.set_rcvlowat	   = mptcp_set_rcvlowat,
>  	.read_sock	   = mptcp_read_sock,
>  	.splice_read	   = mptcp_splice_read,
> +	.splice_eof	   = inet_splice_eof,

Is this line required? Will it not call inet_splice_eof() by default? (I
didn't check)

>  };
>  
>  static struct inet_protosw mptcp_protosw = {

Cheers,
Matt
-- 
Sponsored by the NGI0 Core fund.
Re: [PATCH mptcp-next 2/3] mptcp: implement .splice_eof
Posted by Geliang Tang 1 week ago
Hi Matt,

On Mon, 2026-02-02 at 11:07 +0100, Matthieu Baerts wrote:
> Hi Geliang,
> 
> Thank you for looking at that!
> 
> On 02/02/2026 10:21, Geliang Tang wrote:
> > From: Geliang Tang <tanggeliang@kylinos.cn>
> > 
> > This patch implements the .splice_eof interface for MPTCP, namely
> > mptcp_splice_eof(), which sequentially calls do_tcp_splice_eof()
> > for
> > each subflow.
> 
> Can you please explain what this hook is supposed to do / used for
> please?

do_tcp_splice_eof() ensures that any remaining data in the TCP send
queue is flushed immediately when a sendfile() operation reaches end-
of-file (EOF).

> And also why the solution is to call do_tcp_splice_eof() on each
> subflow?

MPTCP operates over multiple TCP subflows. When splicing data through
an MPTCP socket, each subflow may have pending data in its send buffer
that needs to be properly finalized. So here calls do_tcp_splice_eof()
on each subflow.

> 
> Also, I'm a bit confused: why is this needed? Does it fix something
> or
> is it a new feature or an optimisation?

It is not a fix, but a new feature, to keep consistent with TCP. Since
TCP handles the splice EOF notification but MPTCP didn't.

> 
> > Suggested-by: Matthieu Baerts <matttbe@kernel.org>
> > Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
> > ---
> >  net/mptcp/protocol.c | 16 ++++++++++++++++
> >  1 file changed, 16 insertions(+)
> > 
> > diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> > index c88882062c40..5635d196cb9f 100644
> > --- a/net/mptcp/protocol.c
> > +++ b/net/mptcp/protocol.c
> > @@ -4018,6 +4018,20 @@ static int mptcp_connect(struct sock *sk,
> > struct sockaddr_unsized *uaddr,
> >  	return 0;
> >  }
> >  
> > +static void mptcp_splice_eof(struct socket *sock)
> > +{
> > +	struct mptcp_subflow_context *subflow;
> > +	struct sock *sk = sock->sk, *ssk;
> > +
> > +	lock_sock(sk);
> > +	mptcp_for_each_subflow(mptcp_sk(sk), subflow) {
> > +		ssk = mptcp_subflow_tcp_sock(subflow);
> > +
> > +		do_tcp_splice_eof(ssk);
> 
> Is it fine to call this on closed subflows? e.g. if the initial
> subflow
> has been closed. (I didn't check, maybe that's OK)

Good point. I will add this check in v2:

       if (ssk->sk_state == TCP_CLOSE)
               continue;

> 
> > +	}
> > +	release_sock(sk);
> > +}
> > +
> >  static struct proto mptcp_prot = {
> >  	.name		= "MPTCP",
> >  	.owner		= THIS_MODULE,
> > @@ -4049,6 +4063,7 @@ static struct proto mptcp_prot = {
> >  	.obj_size	= sizeof(struct mptcp_sock),
> >  	.slab_flags	= SLAB_TYPESAFE_BY_RCU,
> >  	.no_autobind	= true,
> > +	.splice_eof	= mptcp_splice_eof,
> >  };
> >  
> >  static int mptcp_bind(struct socket *sock, struct sockaddr_unsized
> > *uaddr, int addr_len)
> > @@ -4540,6 +4555,7 @@ static const struct proto_ops
> > mptcp_stream_ops = {
> >  	.set_rcvlowat	   = mptcp_set_rcvlowat,
> >  	.read_sock	   = mptcp_read_sock,
> >  	.splice_read	   = mptcp_splice_read,
> > +	.splice_eof	   = inet_splice_eof,
> 
> Is this line required? Will it not call inet_splice_eof() by default?
> (I
> didn't check)

Yes, this is required.

sock_splice_eof() needs to call the .splice_eof interface from struct
proto_ops. To maintain consistency with regular TCP behavior, the 
.splice_eof interface of mptcp_stream_ops is set to inet_splice_eof
too. inet_splice_eof() will switch to the protocol-specific
implementation (sk->sk_prot->splice_eof), which for MPTCP is
mptcp_splice_eof().

Thanks,
-Geliang

> 
> >  };
> >  
> >  static struct inet_protosw mptcp_protosw = {
> 
> Cheers,
> Matt
Re: [PATCH mptcp-next 2/3] mptcp: implement .splice_eof
Posted by Matthieu Baerts 6 days, 22 hours ago
Hi Geliang,

Thank you for your reply!

On 03/02/2026 07:36, Geliang Tang wrote:
> Hi Matt,
> 
> On Mon, 2026-02-02 at 11:07 +0100, Matthieu Baerts wrote:
>> Hi Geliang,
>>
>> Thank you for looking at that!
>>
>> On 02/02/2026 10:21, Geliang Tang wrote:
>>> From: Geliang Tang <tanggeliang@kylinos.cn>
>>>
>>> This patch implements the .splice_eof interface for MPTCP, namely
>>> mptcp_splice_eof(), which sequentially calls do_tcp_splice_eof()
>>> for
>>> each subflow.
>>
>> Can you please explain what this hook is supposed to do / used for
>> please?
> 
> do_tcp_splice_eof() ensures that any remaining data in the TCP send
> queue is flushed immediately when a sendfile() operation reaches end-
> of-file (EOF).

OK, so if I understand correctly, it means that without .splice_eof()
support, the queue is not flushed immediately when a sendfile()
operation reaches end-of-file (EOF). But that's OK, it will be flushed,
eventually with a small delay but the most important is that all data
will be transferred. Is that correct?

If it is, can you please reflect that in the commit message?
I think it is essential to mention it is not linked to the 'splice()'
syscall, it is an improvement, and nothing was broken before.

>> And also why the solution is to call do_tcp_splice_eof() on each
>> subflow?
> 
> MPTCP operates over multiple TCP subflows. When splicing data through
> an MPTCP socket, each subflow may have pending data in its send buffer
> that needs to be properly finalized. So here calls do_tcp_splice_eof()
> on each subflow.

Can you also please add a note about that in the commit message?

>> Also, I'm a bit confused: why is this needed? Does it fix something
>> or
>> is it a new feature or an optimisation?
> 
> It is not a fix, but a new feature, to keep consistent with TCP. Since
> TCP handles the splice EOF notification but MPTCP didn't.
> 
>>
>>> Suggested-by: Matthieu Baerts <matttbe@kernel.org>
>>> Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
>>> ---
>>>  net/mptcp/protocol.c | 16 ++++++++++++++++
>>>  1 file changed, 16 insertions(+)
>>>
>>> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
>>> index c88882062c40..5635d196cb9f 100644
>>> --- a/net/mptcp/protocol.c
>>> +++ b/net/mptcp/protocol.c
>>> @@ -4018,6 +4018,20 @@ static int mptcp_connect(struct sock *sk,
>>> struct sockaddr_unsized *uaddr,
>>>  	return 0;
>>>  }
>>>  
>>> +static void mptcp_splice_eof(struct socket *sock)
>>> +{
>>> +	struct mptcp_subflow_context *subflow;
>>> +	struct sock *sk = sock->sk, *ssk;
>>> +
>>> +	lock_sock(sk);
>>> +	mptcp_for_each_subflow(mptcp_sk(sk), subflow) {
>>> +		ssk = mptcp_subflow_tcp_sock(subflow);
>>> +
>>> +		do_tcp_splice_eof(ssk);
>>
>> Is it fine to call this on closed subflows? e.g. if the initial
>> subflow
>> has been closed. (I didn't check, maybe that's OK)
> 
> Good point. I will add this check in v2:
> 
>        if (ssk->sk_state == TCP_CLOSE)
>                continue;
> 
>>
>>> +	}
>>> +	release_sock(sk);
>>> +}
>>> +
>>>  static struct proto mptcp_prot = {
>>>  	.name		= "MPTCP",
>>>  	.owner		= THIS_MODULE,
>>> @@ -4049,6 +4063,7 @@ static struct proto mptcp_prot = {
>>>  	.obj_size	= sizeof(struct mptcp_sock),
>>>  	.slab_flags	= SLAB_TYPESAFE_BY_RCU,
>>>  	.no_autobind	= true,
>>> +	.splice_eof	= mptcp_splice_eof,
>>>  };
>>>  
>>>  static int mptcp_bind(struct socket *sock, struct sockaddr_unsized
>>> *uaddr, int addr_len)
>>> @@ -4540,6 +4555,7 @@ static const struct proto_ops
>>> mptcp_stream_ops = {
>>>  	.set_rcvlowat	   = mptcp_set_rcvlowat,
>>>  	.read_sock	   = mptcp_read_sock,
>>>  	.splice_read	   = mptcp_splice_read,
>>> +	.splice_eof	   = inet_splice_eof,
>>
>> Is this line required? Will it not call inet_splice_eof() by default?
>> (I
>> didn't check)
> 
> Yes, this is required.
> 
> sock_splice_eof() needs to call the .splice_eof interface from struct
> proto_ops. To maintain consistency with regular TCP behavior, the 
> .splice_eof interface of mptcp_stream_ops is set to inet_splice_eof
> too. inet_splice_eof() will switch to the protocol-specific
> implementation (sk->sk_prot->splice_eof), which for MPTCP is
> mptcp_splice_eof().

OK. Then be careful that inet_splice_eof() will call inet_send_prepare()
which will call sock_rps_record_flow(sk). mptcp_rps_record_subflows()
should be called on each subflow, probably from mptcp_splice_eof().

Cheers,
Matt
-- 
Sponsored by the NGI0 Core fund.