include/linux/sunrpc/svcsock.h | 3 +++ net/sunrpc/svcsock.c | 29 ++++++++++++++++++++++------- 2 files changed, 25 insertions(+), 7 deletions(-)
svc_tcp_sendmsg() calls xdr_buf_to_bvec() with the second slot of
rq_bvec as the start, but doesn't reduce the array length by one, which
could lead to an array overrun. Also, rq_bvec is always rq_maxpages in
length, which can be too short in some cases, since the TCP record
marker consumes a slot.
Fix both problems by adding a separate bvec array to the svc_sock that
is specifically for sending. Allocate it when doing the first send on
the socket, to avoid allocating the array for listener sockets.
For TCP, make this array one slot longer than rq_maxpages, to account
for the record marker. For UDP only allocate as large an array as we
need since frames are limited to 64k anyway.
Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
Minor update to the last patch to reduce the size of the array on UDP
sockets since that transport doesn't need rq_maxpages.
---
Changes in v5:
- reduce the size of sk_bvec on UDP sockets
- Link to v4: https://lore.kernel.org/r/20251010-rq_bvec-v4-1-627567f1ce91@kernel.org
Changes in v4:
- switch to allocating a separate bvec for sends in the svc_sock
- Link to v3: https://lore.kernel.org/r/20251009-rq_bvec-v3-0-57181360b9cb@kernel.org
Changes in v3:
- Add rq_bvec_len field and use it in appropriate places
- Link to v2: https://lore.kernel.org/r/20251008-rq_bvec-v2-0-823c0a85a27c@kernel.org
Changes in v2:
- Better changelog message for patch #2
- Link to v1: https://lore.kernel.org/r/20251008-rq_bvec-v1-0-7f23d32d75e5@kernel.org
---
include/linux/sunrpc/svcsock.h | 3 +++
net/sunrpc/svcsock.c | 29 ++++++++++++++++++++++-------
2 files changed, 25 insertions(+), 7 deletions(-)
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 963bbe251e52109a902f6b9097b6e9c3c23b1fd8..a80a05aba75410b3c4cd7ba19181ead7d40e1fdf 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -26,6 +26,9 @@ struct svc_sock {
void (*sk_odata)(struct sock *);
void (*sk_owspace)(struct sock *);
+ /* For sends */
+ struct bio_vec *sk_bvec;
+
/* private TCP part */
/* On-the-wire fragment header: */
__be32 sk_marker;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 7b90abc5cf0ee1520796b2f38fcb977417009830..0ec1131ffade8d0c66099bfb1fb141b22c6e411b 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -730,6 +730,13 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
unsigned int count;
int err;
+ count = DIV_ROUND_UP(RPC_MAX_REPHEADER_WITH_AUTH + RPCSVC_MAXPAYLOAD_UDP, PAGE_SIZE);
+ if (!svsk->sk_bvec) {
+ svsk->sk_bvec = kcalloc(count, sizeof(*svsk->sk_bvec), GFP_KERNEL);
+ if (!svsk->sk_bvec)
+ return -ENOMEM;
+ }
+
svc_udp_release_ctxt(xprt, rqstp->rq_xprt_ctxt);
rqstp->rq_xprt_ctxt = NULL;
@@ -740,14 +747,14 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
if (svc_xprt_is_dead(xprt))
goto out_notconn;
- count = xdr_buf_to_bvec(rqstp->rq_bvec, rqstp->rq_maxpages, xdr);
+ count = xdr_buf_to_bvec(svsk->sk_bvec, rqstp->rq_maxpages, xdr);
- iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, svsk->sk_bvec,
count, rqstp->rq_res.len);
err = sock_sendmsg(svsk->sk_sock, &msg);
if (err == -ECONNREFUSED) {
/* ICMP error on earlier request. */
- iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, svsk->sk_bvec,
count, rqstp->rq_res.len);
err = sock_sendmsg(svsk->sk_sock, &msg);
}
@@ -1235,19 +1242,19 @@ static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp,
int ret;
/* The stream record marker is copied into a temporary page
- * fragment buffer so that it can be included in rq_bvec.
+ * fragment buffer so that it can be included in sk_bvec.
*/
buf = page_frag_alloc(&svsk->sk_frag_cache, sizeof(marker),
GFP_KERNEL);
if (!buf)
return -ENOMEM;
memcpy(buf, &marker, sizeof(marker));
- bvec_set_virt(rqstp->rq_bvec, buf, sizeof(marker));
+ bvec_set_virt(svsk->sk_bvec, buf, sizeof(marker));
- count = xdr_buf_to_bvec(rqstp->rq_bvec + 1, rqstp->rq_maxpages,
+ count = xdr_buf_to_bvec(svsk->sk_bvec + 1, rqstp->rq_maxpages,
&rqstp->rq_res);
- iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, svsk->sk_bvec,
1 + count, sizeof(marker) + rqstp->rq_res.len);
ret = sock_sendmsg(svsk->sk_sock, &msg);
page_frag_free(buf);
@@ -1272,6 +1279,13 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
(u32)xdr->len);
int sent;
+ if (!svsk->sk_bvec) {
+ /* +1 for TCP record marker */
+ svsk->sk_bvec = kcalloc(rqstp->rq_maxpages + 1, sizeof(*svsk->sk_bvec), GFP_KERNEL);
+ if (!svsk->sk_bvec)
+ return -ENOMEM;
+ }
+
svc_tcp_release_ctxt(xprt, rqstp->rq_xprt_ctxt);
rqstp->rq_xprt_ctxt = NULL;
@@ -1636,5 +1650,6 @@ static void svc_sock_free(struct svc_xprt *xprt)
sock_release(sock);
page_frag_cache_drain(&svsk->sk_frag_cache);
+ kfree(svsk->sk_bvec);
kfree(svsk);
}
---
base-commit: 177818f176ef904fb18d237d1dbba00c2643aaf2
change-id: 20251008-rq_bvec-b66afd0fdbbb
Best regards,
--
Jeff Layton <jlayton@kernel.org>
On Sat, 11 Oct 2025, Jeff Layton wrote:
> svc_tcp_sendmsg() calls xdr_buf_to_bvec() with the second slot of
> rq_bvec as the start, but doesn't reduce the array length by one, which
> could lead to an array overrun. Also, rq_bvec is always rq_maxpages in
> length, which can be too short in some cases, since the TCP record
> marker consumes a slot.
>
> Fix both problems by adding a separate bvec array to the svc_sock that
> is specifically for sending. Allocate it when doing the first send on
> the socket, to avoid allocating the array for listener sockets.
>
> For TCP, make this array one slot longer than rq_maxpages, to account
> for the record marker. For UDP only allocate as large an array as we
> need since frames are limited to 64k anyway.
I think I like this approach better - thanks.
It is certainly more focussed and makes it cleared where the problem is
that is being fixed.
However I would prefer the allocation happen in ->xpo_create.
For UDP, always allocate the bvec
For TCP, only allocate if SVC_SOCK_TEMPORARY is set.
>
> Signed-off-by: Jeff Layton <jlayton@kernel.org>
> ---
> Minor update to the last patch to reduce the size of the array on UDP
> sockets since that transport doesn't need rq_maxpages.
> ---
> Changes in v5:
> - reduce the size of sk_bvec on UDP sockets
> - Link to v4: https://lore.kernel.org/r/20251010-rq_bvec-v4-1-627567f1ce91@kernel.org
>
> Changes in v4:
> - switch to allocating a separate bvec for sends in the svc_sock
> - Link to v3: https://lore.kernel.org/r/20251009-rq_bvec-v3-0-57181360b9cb@kernel.org
>
> Changes in v3:
> - Add rq_bvec_len field and use it in appropriate places
> - Link to v2: https://lore.kernel.org/r/20251008-rq_bvec-v2-0-823c0a85a27c@kernel.org
>
> Changes in v2:
> - Better changelog message for patch #2
> - Link to v1: https://lore.kernel.org/r/20251008-rq_bvec-v1-0-7f23d32d75e5@kernel.org
> ---
> include/linux/sunrpc/svcsock.h | 3 +++
> net/sunrpc/svcsock.c | 29 ++++++++++++++++++++++-------
> 2 files changed, 25 insertions(+), 7 deletions(-)
>
> diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
> index 963bbe251e52109a902f6b9097b6e9c3c23b1fd8..a80a05aba75410b3c4cd7ba19181ead7d40e1fdf 100644
> --- a/include/linux/sunrpc/svcsock.h
> +++ b/include/linux/sunrpc/svcsock.h
> @@ -26,6 +26,9 @@ struct svc_sock {
> void (*sk_odata)(struct sock *);
> void (*sk_owspace)(struct sock *);
>
> + /* For sends */
> + struct bio_vec *sk_bvec;
Protected by xpt_mutex. You probably don't need to state that, but I
wanted to check it was protected from concurrent access.
> +
> /* private TCP part */
> /* On-the-wire fragment header: */
> __be32 sk_marker;
> diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
> index 7b90abc5cf0ee1520796b2f38fcb977417009830..0ec1131ffade8d0c66099bfb1fb141b22c6e411b 100644
> --- a/net/sunrpc/svcsock.c
> +++ b/net/sunrpc/svcsock.c
> @@ -730,6 +730,13 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
> unsigned int count;
> int err;
>
> + count = DIV_ROUND_UP(RPC_MAX_REPHEADER_WITH_AUTH + RPCSVC_MAXPAYLOAD_UDP, PAGE_SIZE);
> + if (!svsk->sk_bvec) {
> + svsk->sk_bvec = kcalloc(count, sizeof(*svsk->sk_bvec), GFP_KERNEL);
> + if (!svsk->sk_bvec)
> + return -ENOMEM;
> + }
1/ I don't think that calculate is correct. There is no expectation
that the entire reply is consecutive in pages.
There is likely to be:
- a header. 1 page
- a payload, which if no aligned could be 64K of pages, but a start
and end page
- a tail with padded to round the payload up to 4 bytes.
I think you need a calculation similar to svc_serv_maxpages().
2/ Please don't re-use the "count" variable. Here is an array size,
below is it array usage. The compile can use the same register for
both variables if that works out.
> +
> svc_udp_release_ctxt(xprt, rqstp->rq_xprt_ctxt);
> rqstp->rq_xprt_ctxt = NULL;
>
> @@ -740,14 +747,14 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
> if (svc_xprt_is_dead(xprt))
> goto out_notconn;
>
> - count = xdr_buf_to_bvec(rqstp->rq_bvec, rqstp->rq_maxpages, xdr);
> + count = xdr_buf_to_bvec(svsk->sk_bvec, rqstp->rq_maxpages, xdr);
^^^^^^^^^^^^^^^^^^
That should be "count". That would make it clear that "count" was being
overloaded in a possibly confusing way.
Thanks,
NeilBrown
>
> - iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
> + iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, svsk->sk_bvec,
> count, rqstp->rq_res.len);
> err = sock_sendmsg(svsk->sk_sock, &msg);
> if (err == -ECONNREFUSED) {
> /* ICMP error on earlier request. */
> - iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
> + iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, svsk->sk_bvec,
> count, rqstp->rq_res.len);
> err = sock_sendmsg(svsk->sk_sock, &msg);
> }
> @@ -1235,19 +1242,19 @@ static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp,
> int ret;
>
> /* The stream record marker is copied into a temporary page
> - * fragment buffer so that it can be included in rq_bvec.
> + * fragment buffer so that it can be included in sk_bvec.
> */
> buf = page_frag_alloc(&svsk->sk_frag_cache, sizeof(marker),
> GFP_KERNEL);
> if (!buf)
> return -ENOMEM;
> memcpy(buf, &marker, sizeof(marker));
> - bvec_set_virt(rqstp->rq_bvec, buf, sizeof(marker));
> + bvec_set_virt(svsk->sk_bvec, buf, sizeof(marker));
>
> - count = xdr_buf_to_bvec(rqstp->rq_bvec + 1, rqstp->rq_maxpages,
> + count = xdr_buf_to_bvec(svsk->sk_bvec + 1, rqstp->rq_maxpages,
> &rqstp->rq_res);
>
> - iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
> + iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, svsk->sk_bvec,
> 1 + count, sizeof(marker) + rqstp->rq_res.len);
> ret = sock_sendmsg(svsk->sk_sock, &msg);
> page_frag_free(buf);
> @@ -1272,6 +1279,13 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
> (u32)xdr->len);
> int sent;
>
> + if (!svsk->sk_bvec) {
> + /* +1 for TCP record marker */
> + svsk->sk_bvec = kcalloc(rqstp->rq_maxpages + 1, sizeof(*svsk->sk_bvec), GFP_KERNEL);
> + if (!svsk->sk_bvec)
> + return -ENOMEM;
> + }
> +
> svc_tcp_release_ctxt(xprt, rqstp->rq_xprt_ctxt);
> rqstp->rq_xprt_ctxt = NULL;
>
> @@ -1636,5 +1650,6 @@ static void svc_sock_free(struct svc_xprt *xprt)
> sock_release(sock);
>
> page_frag_cache_drain(&svsk->sk_frag_cache);
> + kfree(svsk->sk_bvec);
> kfree(svsk);
> }
>
> ---
> base-commit: 177818f176ef904fb18d237d1dbba00c2643aaf2
> change-id: 20251008-rq_bvec-b66afd0fdbbb
>
> Best regards,
> --
> Jeff Layton <jlayton@kernel.org>
>
>
© 2016 - 2025 Red Hat, Inc.