From: Geliang Tang <tanggeliang@kylinos.cn>
This patch implements .splice_read interface of mptcp struct proto_ops
as mptcp_splice_read() with reference to tcp_splice_read().
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
---
net/mptcp/protocol.c | 136 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 136 insertions(+)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index fc429d175ede..4638d4be2b98 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -4023,6 +4023,140 @@ static int mptcp_read_sock(struct sock *sk, read_descriptor_t *desc,
return copied;
}
+/*
+ * MPTCP splice context
+ */
+struct mptcp_splice_state {
+ struct pipe_inode_info *pipe;
+ size_t len;
+ unsigned int flags;
+};
+
+static int mptcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
+ unsigned int offset, size_t len)
+{
+ struct mptcp_splice_state *mss = rd_desc->arg.data;
+ int ret;
+
+ ret = skb_splice_bits(skb, skb->sk, offset, mss->pipe,
+ min(rd_desc->count, len), mss->flags);
+ if (ret > 0)
+ rd_desc->count -= ret;
+ return ret;
+}
+
+static int __mptcp_splice_read(struct sock *sk, struct mptcp_splice_state *mss)
+{
+ /* Store MPTCP splice context information in read_descriptor_t. */
+ read_descriptor_t rd_desc = {
+ .arg.data = mss,
+ .count = mss->len,
+ };
+
+ return mptcp_read_sock(sk, &rd_desc, mptcp_splice_data_recv);
+}
+
+/**
+ * mptcp_splice_read - splice data from MPTCP socket to a pipe
+ * @sock: socket to splice from
+ * @ppos: position (not valid)
+ * @pipe: pipe to splice to
+ * @len: number of bytes to splice
+ * @flags: splice modifier flags
+ *
+ * Description:
+ * Will read pages from given socket and fill them into a pipe.
+ *
+ **/
+static ssize_t mptcp_splice_read(struct socket *sock, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t len,
+ unsigned int flags)
+{
+ struct mptcp_splice_state mss = {
+ .pipe = pipe,
+ .len = len,
+ .flags = flags,
+ };
+ struct sock *sk = sock->sk;
+ ssize_t spliced;
+ long timeo;
+ int ret;
+
+ /*
+ * We can't seek on a socket input
+ */
+ if (unlikely(*ppos))
+ return -ESPIPE;
+
+ spliced = 0;
+ ret = 0;
+
+ lock_sock(sk);
+
+ timeo = sock_rcvtimeo(sk, sock->file->f_flags & O_NONBLOCK);
+ while (mss.len) {
+ ret = __mptcp_splice_read(sk, &mss);
+ if (ret < 0) {
+ break;
+ } else if (!ret) {
+ if (spliced)
+ break;
+ if (sock_flag(sk, SOCK_DONE))
+ break;
+ if (sk->sk_err) {
+ ret = sock_error(sk);
+ break;
+ }
+ if (sk->sk_shutdown & RCV_SHUTDOWN) {
+ if (__mptcp_move_skbs(sk))
+ continue;
+ break;
+ }
+ if (sk->sk_state == TCP_CLOSE) {
+ ret = -ENOTCONN;
+ break;
+ }
+ if (!timeo) {
+ ret = -EAGAIN;
+ break;
+ }
+ /* if __mptcp_splice_read() got nothing while we have
+ * an skb in receive queue, we do not want to loop.
+ * This might happen with URG data.
+ */
+ if (!skb_queue_empty(&sk->sk_receive_queue))
+ break;
+ ret = sk_wait_data(sk, &timeo, NULL);
+ if (ret < 0)
+ break;
+ if (signal_pending(current)) {
+ ret = sock_intr_errno(timeo);
+ break;
+ }
+ continue;
+ }
+ mss.len -= ret;
+ spliced += ret;
+
+ if (!mss.len || !timeo)
+ break;
+ release_sock(sk);
+ lock_sock(sk);
+
+ if (sk->sk_err || sk->sk_state == TCP_CLOSE ||
+ (sk->sk_shutdown & RCV_SHUTDOWN) ||
+ signal_pending(current))
+ break;
+ }
+
+ release_sock(sk);
+
+ if (spliced)
+ return spliced;
+
+ return ret;
+}
+
static const struct proto_ops mptcp_stream_ops = {
.family = PF_INET,
.owner = THIS_MODULE,
@@ -4044,6 +4178,7 @@ static const struct proto_ops mptcp_stream_ops = {
.mmap = sock_no_mmap,
.set_rcvlowat = mptcp_set_rcvlowat,
.read_sock = mptcp_read_sock,
+ .splice_read = mptcp_splice_read,
};
static struct inet_protosw mptcp_protosw = {
@@ -4149,6 +4284,7 @@ static const struct proto_ops mptcp_v6_stream_ops = {
#endif
.set_rcvlowat = mptcp_set_rcvlowat,
.read_sock = mptcp_read_sock,
+ .splice_read = mptcp_splice_read,
};
static struct proto mptcp_v6_prot;
--
2.48.1
On 7/7/25 11:34 AM, Geliang Tang wrote:
> From: Geliang Tang <tanggeliang@kylinos.cn>
>
> This patch implements .splice_read interface of mptcp struct proto_ops
> as mptcp_splice_read() with reference to tcp_splice_read().
>
> Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
> ---
> net/mptcp/protocol.c | 136 +++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 136 insertions(+)
>
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index fc429d175ede..4638d4be2b98 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -4023,6 +4023,140 @@ static int mptcp_read_sock(struct sock *sk, read_descriptor_t *desc,
> return copied;
> }
>
> +/*
> + * MPTCP splice context
> + */
> +struct mptcp_splice_state {
> + struct pipe_inode_info *pipe;
> + size_t len;
> + unsigned int flags;
> +};
> +
> +static int mptcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
> + unsigned int offset, size_t len)
> +{
> + struct mptcp_splice_state *mss = rd_desc->arg.data;
> + int ret;
> +
> + ret = skb_splice_bits(skb, skb->sk, offset, mss->pipe,
> + min(rd_desc->count, len), mss->flags);
> + if (ret > 0)
> + rd_desc->count -= ret;
> + return ret;
> +}
I have mixed feeling WRT the above. I'm wondering if we should reuse the
same code already existing in TCP, moving tcp_spice_state definition in
some shared hdr and macking tcp_splice_data_recv not static.
> +static int __mptcp_splice_read(struct sock *sk, struct mptcp_splice_state *mss)
> +{
> + /* Store MPTCP splice context information in read_descriptor_t. */
> + read_descriptor_t rd_desc = {
> + .arg.data = mss,
> + .count = mss->len,
> + };
> +
> + return mptcp_read_sock(sk, &rd_desc, mptcp_splice_data_recv);
> +}
> +
> +/**
> + * mptcp_splice_read - splice data from MPTCP socket to a pipe
> + * @sock: socket to splice from
> + * @ppos: position (not valid)
> + * @pipe: pipe to splice to
> + * @len: number of bytes to splice
> + * @flags: splice modifier flags
> + *
> + * Description:
> + * Will read pages from given socket and fill them into a pipe.
> + *
> + **/
> +static ssize_t mptcp_splice_read(struct socket *sock, loff_t *ppos,
> + struct pipe_inode_info *pipe, size_t len,
> + unsigned int flags)
> +{
> + struct mptcp_splice_state mss = {
> + .pipe = pipe,
> + .len = len,
> + .flags = flags,
> + };
> + struct sock *sk = sock->sk;
> + ssize_t spliced;
> + long timeo;
> + int ret;
> +
> + /*
> + * We can't seek on a socket input
> + */
> + if (unlikely(*ppos))
> + return -ESPIPE;
> +
> + spliced = 0;
> + ret = 0;
> +
> + lock_sock(sk);
> +
> + timeo = sock_rcvtimeo(sk, sock->file->f_flags & O_NONBLOCK);
> + while (mss.len) {
> + ret = __mptcp_splice_read(sk, &mss);
> + if (ret < 0) {
> + break;
> + } else if (!ret) {
> + if (spliced)
> + break;
> + if (sock_flag(sk, SOCK_DONE))
> + break;
> + if (sk->sk_err) {
> + ret = sock_error(sk);
> + break;
> + }
> + if (sk->sk_shutdown & RCV_SHUTDOWN) {
> + if (__mptcp_move_skbs(sk))
> + continue;
> + break;
> + }
> + if (sk->sk_state == TCP_CLOSE) {
> + ret = -ENOTCONN;
> + break;
> + }
> + if (!timeo) {
> + ret = -EAGAIN;
> + break;
> + }
> + /* if __mptcp_splice_read() got nothing while we have
> + * an skb in receive queue, we do not want to loop.
> + * This might happen with URG data.
> + */
> + if (!skb_queue_empty(&sk->sk_receive_queue))
> + break;
> + ret = sk_wait_data(sk, &timeo, NULL);
> + if (ret < 0)
> + break;
> + if (signal_pending(current)) {
> + ret = sock_intr_errno(timeo);
> + break;
> + }
I think that moving the above if statement before the queue empty check
will not change the overall behavior.
With that in place you could factor out an
bool mptcp_recv_should_stop(struct sock *sk, int err)
helper from mptcp_recvmsg() and use it verbatim in both in
mptcp_recvmsg() and here.
Side note: suggestions for a better helper name welcome!
/P
Hi Paolo,
On Tue, 2025-07-08 at 16:52 +0200, Paolo Abeni wrote:
> On 7/7/25 11:34 AM, Geliang Tang wrote:
> > From: Geliang Tang <tanggeliang@kylinos.cn>
> >
> > This patch implements .splice_read interface of mptcp struct
> > proto_ops
> > as mptcp_splice_read() with reference to tcp_splice_read().
> >
> > Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
> > ---
> > net/mptcp/protocol.c | 136
> > +++++++++++++++++++++++++++++++++++++++++++
> > 1 file changed, 136 insertions(+)
> >
> > diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> > index fc429d175ede..4638d4be2b98 100644
> > --- a/net/mptcp/protocol.c
> > +++ b/net/mptcp/protocol.c
> > @@ -4023,6 +4023,140 @@ static int mptcp_read_sock(struct sock *sk,
> > read_descriptor_t *desc,
> > return copied;
> > }
> >
> > +/*
> > + * MPTCP splice context
> > + */
> > +struct mptcp_splice_state {
> > + struct pipe_inode_info *pipe;
> > + size_t len;
> > + unsigned int flags;
> > +};
> > +
> > +static int mptcp_splice_data_recv(read_descriptor_t *rd_desc,
> > struct sk_buff *skb,
> > + unsigned int offset, size_t len)
> > +{
> > + struct mptcp_splice_state *mss = rd_desc->arg.data;
> > + int ret;
> > +
> > + ret = skb_splice_bits(skb, skb->sk, offset, mss->pipe,
> > + min(rd_desc->count, len), mss-
> > >flags);
> > + if (ret > 0)
> > + rd_desc->count -= ret;
> > + return ret;
> > +}
>
> I have mixed feeling WRT the above. I'm wondering if we should reuse
> the
> same code already existing in TCP, moving tcp_spice_state definition
> in
> some shared hdr and macking tcp_splice_data_recv not static.
>
> > +static int __mptcp_splice_read(struct sock *sk, struct
> > mptcp_splice_state *mss)
> > +{
> > + /* Store MPTCP splice context information in
> > read_descriptor_t. */
> > + read_descriptor_t rd_desc = {
> > + .arg.data = mss,
> > + .count = mss->len,
> > + };
> > +
> > + return mptcp_read_sock(sk, &rd_desc,
> > mptcp_splice_data_recv);
> > +}
> > +
> > +/**
> > + * mptcp_splice_read - splice data from MPTCP socket to a pipe
> > + * @sock: socket to splice from
> > + * @ppos: position (not valid)
> > + * @pipe: pipe to splice to
> > + * @len: number of bytes to splice
> > + * @flags: splice modifier flags
> > + *
> > + * Description:
> > + * Will read pages from given socket and fill them into a pipe.
> > + *
> > + **/
> > +static ssize_t mptcp_splice_read(struct socket *sock, loff_t
> > *ppos,
> > + struct pipe_inode_info *pipe,
> > size_t len,
> > + unsigned int flags)
> > +{
> > + struct mptcp_splice_state mss = {
> > + .pipe = pipe,
> > + .len = len,
> > + .flags = flags,
> > + };
> > + struct sock *sk = sock->sk;
> > + ssize_t spliced;
> > + long timeo;
> > + int ret;
> > +
> > + /*
> > + * We can't seek on a socket input
> > + */
> > + if (unlikely(*ppos))
> > + return -ESPIPE;
> > +
> > + spliced = 0;
> > + ret = 0;
> > +
> > + lock_sock(sk);
> > +
> > + timeo = sock_rcvtimeo(sk, sock->file->f_flags &
> > O_NONBLOCK);
> > + while (mss.len) {
> > + ret = __mptcp_splice_read(sk, &mss);
> > + if (ret < 0) {
> > + break;
> > + } else if (!ret) {
> > + if (spliced)
> > + break;
> > + if (sock_flag(sk, SOCK_DONE))
> > + break;
I noticed that this SOCK_DONE flag is also checked in
tcp_recvmsg_locked() but not in mptcp_recvmsg(). I wonder if this flag
should also be checked in mptcp_recvmsg() too.
> > + if (sk->sk_err) {
> > + ret = sock_error(sk);
> > + break;
> > + }
> > + if (sk->sk_shutdown & RCV_SHUTDOWN) {
> > + if (__mptcp_move_skbs(sk))
> > + continue;
> > + break;
> > + }
> > + if (sk->sk_state == TCP_CLOSE) {
> > + ret = -ENOTCONN;
> > + break;
> > + }
> > + if (!timeo) {
> > + ret = -EAGAIN;
> > + break;
> > + }
> > + /* if __mptcp_splice_read() got nothing
> > while we have
> > + * an skb in receive queue, we do not want
> > to loop.
> > + * This might happen with URG data.
> > + */
> > + if (!skb_queue_empty(&sk-
> > >sk_receive_queue))
> > + break;
> > + ret = sk_wait_data(sk, &timeo, NULL);
> > + if (ret < 0)
> > + break;
> > + if (signal_pending(current)) {
> > + ret = sock_intr_errno(timeo);
> > + break;
> > + }
>
> I think that moving the above if statement before the queue empty
> check
> will not change the overall behavior.
>
> With that in place you could factor out an
>
> bool mptcp_recv_should_stop(struct sock *sk, int err)
This helper can also be used in tcp_recvmsg_locked and tcp_splice_read
too. What about rename it as tcp_recv_should_stop, then add it in
include/net/tcp.h and use it for both TCP and MPTCP. WDYT?
Thanks,
-Geliang
>
> helper from mptcp_recvmsg() and use it verbatim in both in
> mptcp_recvmsg() and here.
>
> Side note: suggestions for a better helper name welcome!
>
> /P
>
© 2016 - 2026 Red Hat, Inc.