From: Geliang Tang <tanggeliang@kylinos.cn>
This patch implements .splice_read interface of mptcp struct proto_ops
as mptcp_splice_read() with reference to tcp_splice_read().
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
---
net/mptcp/protocol.c | 136 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 136 insertions(+)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index fc429d175ede..4638d4be2b98 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -4023,6 +4023,140 @@ static int mptcp_read_sock(struct sock *sk, read_descriptor_t *desc,
return copied;
}
+/*
+ * MPTCP splice context
+ */
+struct mptcp_splice_state {
+ struct pipe_inode_info *pipe;
+ size_t len;
+ unsigned int flags;
+};
+
+static int mptcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
+ unsigned int offset, size_t len)
+{
+ struct mptcp_splice_state *mss = rd_desc->arg.data;
+ int ret;
+
+ ret = skb_splice_bits(skb, skb->sk, offset, mss->pipe,
+ min(rd_desc->count, len), mss->flags);
+ if (ret > 0)
+ rd_desc->count -= ret;
+ return ret;
+}
+
+static int __mptcp_splice_read(struct sock *sk, struct mptcp_splice_state *mss)
+{
+ /* Store MPTCP splice context information in read_descriptor_t. */
+ read_descriptor_t rd_desc = {
+ .arg.data = mss,
+ .count = mss->len,
+ };
+
+ return mptcp_read_sock(sk, &rd_desc, mptcp_splice_data_recv);
+}
+
+/**
+ * mptcp_splice_read - splice data from MPTCP socket to a pipe
+ * @sock: socket to splice from
+ * @ppos: position (not valid)
+ * @pipe: pipe to splice to
+ * @len: number of bytes to splice
+ * @flags: splice modifier flags
+ *
+ * Description:
+ * Will read pages from given socket and fill them into a pipe.
+ *
+ **/
+static ssize_t mptcp_splice_read(struct socket *sock, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t len,
+ unsigned int flags)
+{
+ struct mptcp_splice_state mss = {
+ .pipe = pipe,
+ .len = len,
+ .flags = flags,
+ };
+ struct sock *sk = sock->sk;
+ ssize_t spliced;
+ long timeo;
+ int ret;
+
+ /*
+ * We can't seek on a socket input
+ */
+ if (unlikely(*ppos))
+ return -ESPIPE;
+
+ spliced = 0;
+ ret = 0;
+
+ lock_sock(sk);
+
+ timeo = sock_rcvtimeo(sk, sock->file->f_flags & O_NONBLOCK);
+ while (mss.len) {
+ ret = __mptcp_splice_read(sk, &mss);
+ if (ret < 0) {
+ break;
+ } else if (!ret) {
+ if (spliced)
+ break;
+ if (sock_flag(sk, SOCK_DONE))
+ break;
+ if (sk->sk_err) {
+ ret = sock_error(sk);
+ break;
+ }
+ if (sk->sk_shutdown & RCV_SHUTDOWN) {
+ if (__mptcp_move_skbs(sk))
+ continue;
+ break;
+ }
+ if (sk->sk_state == TCP_CLOSE) {
+ ret = -ENOTCONN;
+ break;
+ }
+ if (!timeo) {
+ ret = -EAGAIN;
+ break;
+ }
+ /* if __mptcp_splice_read() got nothing while we have
+ * an skb in receive queue, we do not want to loop.
+ * This might happen with URG data.
+ */
+ if (!skb_queue_empty(&sk->sk_receive_queue))
+ break;
+ ret = sk_wait_data(sk, &timeo, NULL);
+ if (ret < 0)
+ break;
+ if (signal_pending(current)) {
+ ret = sock_intr_errno(timeo);
+ break;
+ }
+ continue;
+ }
+ mss.len -= ret;
+ spliced += ret;
+
+ if (!mss.len || !timeo)
+ break;
+ release_sock(sk);
+ lock_sock(sk);
+
+ if (sk->sk_err || sk->sk_state == TCP_CLOSE ||
+ (sk->sk_shutdown & RCV_SHUTDOWN) ||
+ signal_pending(current))
+ break;
+ }
+
+ release_sock(sk);
+
+ if (spliced)
+ return spliced;
+
+ return ret;
+}
+
static const struct proto_ops mptcp_stream_ops = {
.family = PF_INET,
.owner = THIS_MODULE,
@@ -4044,6 +4178,7 @@ static const struct proto_ops mptcp_stream_ops = {
.mmap = sock_no_mmap,
.set_rcvlowat = mptcp_set_rcvlowat,
.read_sock = mptcp_read_sock,
+ .splice_read = mptcp_splice_read,
};
static struct inet_protosw mptcp_protosw = {
@@ -4149,6 +4284,7 @@ static const struct proto_ops mptcp_v6_stream_ops = {
#endif
.set_rcvlowat = mptcp_set_rcvlowat,
.read_sock = mptcp_read_sock,
+ .splice_read = mptcp_splice_read,
};
static struct proto mptcp_v6_prot;
--
2.48.1
On 7/7/25 11:34 AM, Geliang Tang wrote: > From: Geliang Tang <tanggeliang@kylinos.cn> > > This patch implements .splice_read interface of mptcp struct proto_ops > as mptcp_splice_read() with reference to tcp_splice_read(). > > Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> > --- > net/mptcp/protocol.c | 136 +++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 136 insertions(+) > > diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c > index fc429d175ede..4638d4be2b98 100644 > --- a/net/mptcp/protocol.c > +++ b/net/mptcp/protocol.c > @@ -4023,6 +4023,140 @@ static int mptcp_read_sock(struct sock *sk, read_descriptor_t *desc, > return copied; > } > > +/* > + * MPTCP splice context > + */ > +struct mptcp_splice_state { > + struct pipe_inode_info *pipe; > + size_t len; > + unsigned int flags; > +}; > + > +static int mptcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, > + unsigned int offset, size_t len) > +{ > + struct mptcp_splice_state *mss = rd_desc->arg.data; > + int ret; > + > + ret = skb_splice_bits(skb, skb->sk, offset, mss->pipe, > + min(rd_desc->count, len), mss->flags); > + if (ret > 0) > + rd_desc->count -= ret; > + return ret; > +} I have mixed feeling WRT the above. I'm wondering if we should reuse the same code already existing in TCP, moving tcp_spice_state definition in some shared hdr and macking tcp_splice_data_recv not static. > +static int __mptcp_splice_read(struct sock *sk, struct mptcp_splice_state *mss) > +{ > + /* Store MPTCP splice context information in read_descriptor_t. */ > + read_descriptor_t rd_desc = { > + .arg.data = mss, > + .count = mss->len, > + }; > + > + return mptcp_read_sock(sk, &rd_desc, mptcp_splice_data_recv); > +} > + > +/** > + * mptcp_splice_read - splice data from MPTCP socket to a pipe > + * @sock: socket to splice from > + * @ppos: position (not valid) > + * @pipe: pipe to splice to > + * @len: number of bytes to splice > + * @flags: splice modifier flags > + * > + * Description: > + * Will read pages from given socket and fill them into a pipe. > + * > + **/ > +static ssize_t mptcp_splice_read(struct socket *sock, loff_t *ppos, > + struct pipe_inode_info *pipe, size_t len, > + unsigned int flags) > +{ > + struct mptcp_splice_state mss = { > + .pipe = pipe, > + .len = len, > + .flags = flags, > + }; > + struct sock *sk = sock->sk; > + ssize_t spliced; > + long timeo; > + int ret; > + > + /* > + * We can't seek on a socket input > + */ > + if (unlikely(*ppos)) > + return -ESPIPE; > + > + spliced = 0; > + ret = 0; > + > + lock_sock(sk); > + > + timeo = sock_rcvtimeo(sk, sock->file->f_flags & O_NONBLOCK); > + while (mss.len) { > + ret = __mptcp_splice_read(sk, &mss); > + if (ret < 0) { > + break; > + } else if (!ret) { > + if (spliced) > + break; > + if (sock_flag(sk, SOCK_DONE)) > + break; > + if (sk->sk_err) { > + ret = sock_error(sk); > + break; > + } > + if (sk->sk_shutdown & RCV_SHUTDOWN) { > + if (__mptcp_move_skbs(sk)) > + continue; > + break; > + } > + if (sk->sk_state == TCP_CLOSE) { > + ret = -ENOTCONN; > + break; > + } > + if (!timeo) { > + ret = -EAGAIN; > + break; > + } > + /* if __mptcp_splice_read() got nothing while we have > + * an skb in receive queue, we do not want to loop. > + * This might happen with URG data. > + */ > + if (!skb_queue_empty(&sk->sk_receive_queue)) > + break; > + ret = sk_wait_data(sk, &timeo, NULL); > + if (ret < 0) > + break; > + if (signal_pending(current)) { > + ret = sock_intr_errno(timeo); > + break; > + } I think that moving the above if statement before the queue empty check will not change the overall behavior. With that in place you could factor out an bool mptcp_recv_should_stop(struct sock *sk, int err) helper from mptcp_recvmsg() and use it verbatim in both in mptcp_recvmsg() and here. Side note: suggestions for a better helper name welcome! /P
Hi Paolo, On Tue, 2025-07-08 at 16:52 +0200, Paolo Abeni wrote: > On 7/7/25 11:34 AM, Geliang Tang wrote: > > From: Geliang Tang <tanggeliang@kylinos.cn> > > > > This patch implements .splice_read interface of mptcp struct > > proto_ops > > as mptcp_splice_read() with reference to tcp_splice_read(). > > > > Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> > > --- > > net/mptcp/protocol.c | 136 > > +++++++++++++++++++++++++++++++++++++++++++ > > 1 file changed, 136 insertions(+) > > > > diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c > > index fc429d175ede..4638d4be2b98 100644 > > --- a/net/mptcp/protocol.c > > +++ b/net/mptcp/protocol.c > > @@ -4023,6 +4023,140 @@ static int mptcp_read_sock(struct sock *sk, > > read_descriptor_t *desc, > > return copied; > > } > > > > +/* > > + * MPTCP splice context > > + */ > > +struct mptcp_splice_state { > > + struct pipe_inode_info *pipe; > > + size_t len; > > + unsigned int flags; > > +}; > > + > > +static int mptcp_splice_data_recv(read_descriptor_t *rd_desc, > > struct sk_buff *skb, > > + unsigned int offset, size_t len) > > +{ > > + struct mptcp_splice_state *mss = rd_desc->arg.data; > > + int ret; > > + > > + ret = skb_splice_bits(skb, skb->sk, offset, mss->pipe, > > + min(rd_desc->count, len), mss- > > >flags); > > + if (ret > 0) > > + rd_desc->count -= ret; > > + return ret; > > +} > > I have mixed feeling WRT the above. I'm wondering if we should reuse > the > same code already existing in TCP, moving tcp_spice_state definition > in > some shared hdr and macking tcp_splice_data_recv not static. > > > +static int __mptcp_splice_read(struct sock *sk, struct > > mptcp_splice_state *mss) > > +{ > > + /* Store MPTCP splice context information in > > read_descriptor_t. */ > > + read_descriptor_t rd_desc = { > > + .arg.data = mss, > > + .count = mss->len, > > + }; > > + > > + return mptcp_read_sock(sk, &rd_desc, > > mptcp_splice_data_recv); > > +} > > + > > +/** > > + * mptcp_splice_read - splice data from MPTCP socket to a pipe > > + * @sock: socket to splice from > > + * @ppos: position (not valid) > > + * @pipe: pipe to splice to > > + * @len: number of bytes to splice > > + * @flags: splice modifier flags > > + * > > + * Description: > > + * Will read pages from given socket and fill them into a pipe. > > + * > > + **/ > > +static ssize_t mptcp_splice_read(struct socket *sock, loff_t > > *ppos, > > + struct pipe_inode_info *pipe, > > size_t len, > > + unsigned int flags) > > +{ > > + struct mptcp_splice_state mss = { > > + .pipe = pipe, > > + .len = len, > > + .flags = flags, > > + }; > > + struct sock *sk = sock->sk; > > + ssize_t spliced; > > + long timeo; > > + int ret; > > + > > + /* > > + * We can't seek on a socket input > > + */ > > + if (unlikely(*ppos)) > > + return -ESPIPE; > > + > > + spliced = 0; > > + ret = 0; > > + > > + lock_sock(sk); > > + > > + timeo = sock_rcvtimeo(sk, sock->file->f_flags & > > O_NONBLOCK); > > + while (mss.len) { > > + ret = __mptcp_splice_read(sk, &mss); > > + if (ret < 0) { > > + break; > > + } else if (!ret) { > > + if (spliced) > > + break; > > + if (sock_flag(sk, SOCK_DONE)) > > + break; I noticed that this SOCK_DONE flag is also checked in tcp_recvmsg_locked() but not in mptcp_recvmsg(). I wonder if this flag should also be checked in mptcp_recvmsg() too. > > + if (sk->sk_err) { > > + ret = sock_error(sk); > > + break; > > + } > > + if (sk->sk_shutdown & RCV_SHUTDOWN) { > > + if (__mptcp_move_skbs(sk)) > > + continue; > > + break; > > + } > > + if (sk->sk_state == TCP_CLOSE) { > > + ret = -ENOTCONN; > > + break; > > + } > > + if (!timeo) { > > + ret = -EAGAIN; > > + break; > > + } > > + /* if __mptcp_splice_read() got nothing > > while we have > > + * an skb in receive queue, we do not want > > to loop. > > + * This might happen with URG data. > > + */ > > + if (!skb_queue_empty(&sk- > > >sk_receive_queue)) > > + break; > > + ret = sk_wait_data(sk, &timeo, NULL); > > + if (ret < 0) > > + break; > > + if (signal_pending(current)) { > > + ret = sock_intr_errno(timeo); > > + break; > > + } > > I think that moving the above if statement before the queue empty > check > will not change the overall behavior. > > With that in place you could factor out an > > bool mptcp_recv_should_stop(struct sock *sk, int err) This helper can also be used in tcp_recvmsg_locked and tcp_splice_read too. What about rename it as tcp_recv_should_stop, then add it in include/net/tcp.h and use it for both TCP and MPTCP. WDYT? Thanks, -Geliang > > helper from mptcp_recvmsg() and use it verbatim in both in > mptcp_recvmsg() and here. > > Side note: suggestions for a better helper name welcome! > > /P >
© 2016 - 2025 Red Hat, Inc.