Allows to query in-sequence data ready for read(), total bytes in
write queue and total bytes in write queue that have not yet been sent.
v2: remove unneeded READ_ONCE() (Paolo Abeni)
Signed-off-by: Florian Westphal <fw@strlen.de>
---
net/mptcp/protocol.c | 54 ++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 54 insertions(+)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 61047d04e849..1c4604283b00 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -22,6 +22,7 @@
#endif
#include <net/mptcp.h>
#include <net/xfrm.h>
+#include <asm/ioctls.h>
#include "protocol.h"
#include "mib.h"
@@ -3260,6 +3261,58 @@ static int mptcp_forward_alloc_get(const struct sock *sk)
return sk->sk_forward_alloc + mptcp_sk(sk)->rmem_fwd_alloc;
}
+static int mptcp_ioctl_outq(const struct mptcp_sock *msk, u64 v)
+{
+ const struct sock *sk = (void *)msk;
+ u64 delta;
+
+ if (sk->sk_state == TCP_LISTEN)
+ return -EINVAL;
+
+ if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
+ return 0;
+
+ delta = msk->write_seq - v;
+ if (delta > INT_MAX)
+ delta = INT_MAX;
+
+ return (int)delta;
+}
+
+static int mptcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
+{
+ struct mptcp_sock *msk = mptcp_sk(sk);
+ int answ;
+ bool slow;
+
+ switch (cmd) {
+ case SIOCINQ:
+ if (sk->sk_state == TCP_LISTEN)
+ return -EINVAL;
+
+ lock_sock(sk);
+ if (skb_queue_empty(&msk->receive_queue))
+ __mptcp_move_skbs(msk);
+ answ = mptcp_inq_hint(sk);
+ release_sock(sk);
+ break;
+ case SIOCOUTQ:
+ slow = lock_sock_fast(sk);
+ answ = mptcp_ioctl_outq(msk, READ_ONCE(msk->snd_una));
+ unlock_sock_fast(sk, slow);
+ break;
+ case SIOCOUTQNSD:
+ slow = lock_sock_fast(sk);
+ answ = mptcp_ioctl_outq(msk, msk->snd_nxt);
+ unlock_sock_fast(sk, slow);
+ break;
+ default:
+ return -ENOIOCTLCMD;
+ }
+
+ return put_user(answ, (int __user *)arg);
+}
+
static struct proto mptcp_prot = {
.name = "MPTCP",
.owner = THIS_MODULE,
@@ -3272,6 +3325,7 @@ static struct proto mptcp_prot = {
.shutdown = mptcp_shutdown,
.destroy = mptcp_destroy,
.sendmsg = mptcp_sendmsg,
+ .ioctl = mptcp_ioctl,
.recvmsg = mptcp_recvmsg,
.release_cb = mptcp_release_cb,
.hash = mptcp_hash,
--
2.32.0
On Thu, 11 Nov 2021, Florian Westphal wrote:
> Allows to query in-sequence data ready for read(), total bytes in
> write queue and total bytes in write queue that have not yet been sent.
>
> v2: remove unneeded READ_ONCE() (Paolo Abeni)
>
> Signed-off-by: Florian Westphal <fw@strlen.de>
> ---
> net/mptcp/protocol.c | 54 ++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 54 insertions(+)
>
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 61047d04e849..1c4604283b00 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -22,6 +22,7 @@
> #endif
> #include <net/mptcp.h>
> #include <net/xfrm.h>
> +#include <asm/ioctls.h>
> #include "protocol.h"
> #include "mib.h"
>
> @@ -3260,6 +3261,58 @@ static int mptcp_forward_alloc_get(const struct sock *sk)
> return sk->sk_forward_alloc + mptcp_sk(sk)->rmem_fwd_alloc;
> }
>
> +static int mptcp_ioctl_outq(const struct mptcp_sock *msk, u64 v)
> +{
> + const struct sock *sk = (void *)msk;
> + u64 delta;
> +
> + if (sk->sk_state == TCP_LISTEN)
> + return -EINVAL;
> +
> + if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
> + return 0;
> +
> + delta = msk->write_seq - v;
> + if (delta > INT_MAX)
> + delta = INT_MAX;
> +
> + return (int)delta;
> +}
> +
> +static int mptcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
> +{
> + struct mptcp_sock *msk = mptcp_sk(sk);
> + int answ;
> + bool slow;
Reverse-xmas issue here.
> +
> + switch (cmd) {
> + case SIOCINQ:
> + if (sk->sk_state == TCP_LISTEN)
> + return -EINVAL;
> +
> + lock_sock(sk);
> + if (skb_queue_empty(&msk->receive_queue))
I'm guessing that the idea here is that answering with a nonzero value is
enough to get the userspace program to do another read, rather than always
imposing overhead to dig through the subflow rx buffers and
sk->sk_receive_queue? That does seem like a good way to go, at least with
regard to calling __mptcp_move_skbs().
Would it be useful to check sk->sk_receive_queue and splice if non-empty
(which is relatively cheap) to get a slightly more accurate inq number?
To maybe answer this question for myself - the socket lock was just
acquired so it's unlikely for skbs to have been added to sk_receive_queue,
and the extra complexity may not be worth it.
- Mat
> + __mptcp_move_skbs(msk);
> + answ = mptcp_inq_hint(sk);
> + release_sock(sk);
> + break;
> + case SIOCOUTQ:
> + slow = lock_sock_fast(sk);
> + answ = mptcp_ioctl_outq(msk, READ_ONCE(msk->snd_una));
> + unlock_sock_fast(sk, slow);
> + break;
> + case SIOCOUTQNSD:
> + slow = lock_sock_fast(sk);
> + answ = mptcp_ioctl_outq(msk, msk->snd_nxt);
> + unlock_sock_fast(sk, slow);
> + break;
> + default:
> + return -ENOIOCTLCMD;
> + }
> +
> + return put_user(answ, (int __user *)arg);
> +}
> +
> static struct proto mptcp_prot = {
> .name = "MPTCP",
> .owner = THIS_MODULE,
> @@ -3272,6 +3325,7 @@ static struct proto mptcp_prot = {
> .shutdown = mptcp_shutdown,
> .destroy = mptcp_destroy,
> .sendmsg = mptcp_sendmsg,
> + .ioctl = mptcp_ioctl,
> .recvmsg = mptcp_recvmsg,
> .release_cb = mptcp_release_cb,
> .hash = mptcp_hash,
> --
> 2.32.0
>
>
>
--
Mat Martineau
Intel
Mat Martineau <mathew.j.martineau@linux.intel.com> wrote: > > + lock_sock(sk); > > + if (skb_queue_empty(&msk->receive_queue)) > > I'm guessing that the idea here is that answering with a nonzero value is > enough to get the userspace program to do another read, rather than always > imposing overhead to dig through the subflow rx buffers and > sk->sk_receive_queue? That does seem like a good way to go, at least with > regard to calling __mptcp_move_skbs(). > > Would it be useful to check sk->sk_receive_queue and splice if non-empty > (which is relatively cheap) to get a slightly more accurate inq number? To > maybe answer this question for myself - the socket lock was just acquired so > it's unlikely for skbs to have been added to sk_receive_queue, and the extra > complexity may not be worth it. The idea was to avoid a bogus '0' answer; msk rx queue might be empty but sk_rx q might have new data. I can make the __mptcp_move_skbs() unconditional, as you said we already acquired the msk socket lock so might as well splice pending subflows and so on.
© 2016 - 2026 Red Hat, Inc.