[PATCH 2/2] sunrpc: add a slot to rqstp->rq_bvec for TCP record marker

Jeff Layton posted 2 patches 2 months, 1 week ago
There is a newer version of this series
[PATCH 2/2] sunrpc: add a slot to rqstp->rq_bvec for TCP record marker
Posted by Jeff Layton 2 months, 1 week ago
svc_tcp_sendmsg steals a slot in the rq_bvec array for the TCP record
marker. If the send is an unaligned READ call though, then there may not
be enough slots in the rq_bvec array.

Add a slot to the rq_bvec array, and fix up the array length
calculations.

Fixes: e18e157bb5c8 ("SUNRPC: Send RPC message on TCP with a single sock_sendmsg() call")
Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/nfsd/vfs.c        | 6 +++---
 net/sunrpc/svc.c     | 3 ++-
 net/sunrpc/svcsock.c | 4 ++--
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 77f6879c2e063fa79865100bbc2d1e64eb332f42..c4e9300d657cf7fdba23f2f4e4bdaad9cd99d1a3 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1111,7 +1111,7 @@ nfsd_direct_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
 
 	v = 0;
 	total = dio_end - dio_start;
-	while (total && v < rqstp->rq_maxpages &&
+	while (total && v < rqstp->rq_maxpages + 1 &&
 	       rqstp->rq_next_page < rqstp->rq_page_end) {
 		len = min_t(size_t, total, PAGE_SIZE);
 		bvec_set_page(&rqstp->rq_bvec[v], *rqstp->rq_next_page,
@@ -1200,7 +1200,7 @@ __be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
 
 	v = 0;
 	total = *count;
-	while (total && v < rqstp->rq_maxpages &&
+	while (total && v < rqstp->rq_maxpages + 1 &&
 	       rqstp->rq_next_page < rqstp->rq_page_end) {
 		len = min_t(size_t, total, PAGE_SIZE - base);
 		bvec_set_page(&rqstp->rq_bvec[v], *rqstp->rq_next_page,
@@ -1318,7 +1318,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	if (stable && !fhp->fh_use_wgather)
 		kiocb.ki_flags |= IOCB_DSYNC;
 
-	nvecs = xdr_buf_to_bvec(rqstp->rq_bvec, rqstp->rq_maxpages, payload);
+	nvecs = xdr_buf_to_bvec(rqstp->rq_bvec, rqstp->rq_maxpages + 1, payload);
 	iov_iter_bvec(&iter, ITER_SOURCE, rqstp->rq_bvec, nvecs, *cnt);
 	since = READ_ONCE(file->f_wb_err);
 	if (verf)
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 4704dce7284eccc9e2bc64cf22947666facfa86a..919263a0c04e3f1afa607414bc1893ba02206e38 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -706,7 +706,8 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
 	if (!svc_init_buffer(rqstp, serv, node))
 		goto out_enomem;
 
-	rqstp->rq_bvec = kcalloc_node(rqstp->rq_maxpages,
+	/* +1 for the TCP record marker */
+	rqstp->rq_bvec = kcalloc_node(rqstp->rq_maxpages + 1,
 				      sizeof(struct bio_vec),
 				      GFP_KERNEL, node);
 	if (!rqstp->rq_bvec)
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 377fcaaaa061463fc5c85fc09c7a8eab5e06af77..5f8bb11b686bcd7302b94476490ba9b1b9ddc06a 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -740,7 +740,7 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
 	if (svc_xprt_is_dead(xprt))
 		goto out_notconn;
 
-	count = xdr_buf_to_bvec(rqstp->rq_bvec, rqstp->rq_maxpages, xdr);
+	count = xdr_buf_to_bvec(rqstp->rq_bvec, rqstp->rq_maxpages + 1, xdr);
 
 	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
 		      count, rqstp->rq_res.len);
@@ -1244,7 +1244,7 @@ static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp,
 	memcpy(buf, &marker, sizeof(marker));
 	bvec_set_virt(rqstp->rq_bvec, buf, sizeof(marker));
 
-	count = xdr_buf_to_bvec(rqstp->rq_bvec + 1, rqstp->rq_maxpages - 1,
+	count = xdr_buf_to_bvec(rqstp->rq_bvec + 1, rqstp->rq_maxpages,
 				&rqstp->rq_res);
 
 	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,

-- 
2.51.0
Re: [PATCH 2/2] sunrpc: add a slot to rqstp->rq_bvec for TCP record marker
Posted by Jeff Layton 2 months, 1 week ago
On Wed, 2025-10-08 at 14:58 -0400, Jeff Layton wrote:
> svc_tcp_sendmsg steals a slot in the rq_bvec array for the TCP record
> marker. If the send is an unaligned READ call though, then there may not
> be enough slots in the rq_bvec array.
> 
> Add a slot to the rq_bvec array, and fix up the array length
> calculations.
> 
> Fixes: e18e157bb5c8 ("SUNRPC: Send RPC message on TCP with a single sock_sendmsg() call")
> Signed-off-by: Jeff Layton <jlayton@kernel.org>
> ---
>  fs/nfsd/vfs.c        | 6 +++---
>  net/sunrpc/svc.c     | 3 ++-
>  net/sunrpc/svcsock.c | 4 ++--
>  3 files changed, 7 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
> index 77f6879c2e063fa79865100bbc2d1e64eb332f42..c4e9300d657cf7fdba23f2f4e4bdaad9cd99d1a3 100644
> --- a/fs/nfsd/vfs.c
> +++ b/fs/nfsd/vfs.c
> @@ -1111,7 +1111,7 @@ nfsd_direct_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
>  
>  	v = 0;
>  	total = dio_end - dio_start;
> -	while (total && v < rqstp->rq_maxpages &&
> +	while (total && v < rqstp->rq_maxpages + 1 &&
>  	       rqstp->rq_next_page < rqstp->rq_page_end) {
>  		len = min_t(size_t, total, PAGE_SIZE);
>  		bvec_set_page(&rqstp->rq_bvec[v], *rqstp->rq_next_page,
> @@ -1200,7 +1200,7 @@ __be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
>  
>  	v = 0;
>  	total = *count;
> -	while (total && v < rqstp->rq_maxpages &&
> +	while (total && v < rqstp->rq_maxpages + 1 &&
>  	       rqstp->rq_next_page < rqstp->rq_page_end) {
>  		len = min_t(size_t, total, PAGE_SIZE - base);
>  		bvec_set_page(&rqstp->rq_bvec[v], *rqstp->rq_next_page,
> @@ -1318,7 +1318,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
>  	if (stable && !fhp->fh_use_wgather)
>  		kiocb.ki_flags |= IOCB_DSYNC;
>  
> -	nvecs = xdr_buf_to_bvec(rqstp->rq_bvec, rqstp->rq_maxpages, payload);
> +	nvecs = xdr_buf_to_bvec(rqstp->rq_bvec, rqstp->rq_maxpages + 1, payload);
>  	iov_iter_bvec(&iter, ITER_SOURCE, rqstp->rq_bvec, nvecs, *cnt);
>  	since = READ_ONCE(file->f_wb_err);
>  	if (verf)
> diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
> index 4704dce7284eccc9e2bc64cf22947666facfa86a..919263a0c04e3f1afa607414bc1893ba02206e38 100644
> --- a/net/sunrpc/svc.c
> +++ b/net/sunrpc/svc.c
> @@ -706,7 +706,8 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
>  	if (!svc_init_buffer(rqstp, serv, node))
>  		goto out_enomem;
>  
> -	rqstp->rq_bvec = kcalloc_node(rqstp->rq_maxpages,
> +	/* +1 for the TCP record marker */
> +	rqstp->rq_bvec = kcalloc_node(rqstp->rq_maxpages + 1,
>  				      sizeof(struct bio_vec),
>  				      GFP_KERNEL, node);
>  	if (!rqstp->rq_bvec)
> diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
> index 377fcaaaa061463fc5c85fc09c7a8eab5e06af77..5f8bb11b686bcd7302b94476490ba9b1b9ddc06a 100644
> --- a/net/sunrpc/svcsock.c
> +++ b/net/sunrpc/svcsock.c
> @@ -740,7 +740,7 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
>  	if (svc_xprt_is_dead(xprt))
>  		goto out_notconn;
>  
> -	count = xdr_buf_to_bvec(rqstp->rq_bvec, rqstp->rq_maxpages, xdr);
> +	count = xdr_buf_to_bvec(rqstp->rq_bvec, rqstp->rq_maxpages + 1, xdr);
>  
>  	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
>  		      count, rqstp->rq_res.len);
> @@ -1244,7 +1244,7 @@ static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp,
>  	memcpy(buf, &marker, sizeof(marker));
>  	bvec_set_virt(rqstp->rq_bvec, buf, sizeof(marker));
>  
> -	count = xdr_buf_to_bvec(rqstp->rq_bvec + 1, rqstp->rq_maxpages - 1,
> +	count = xdr_buf_to_bvec(rqstp->rq_bvec + 1, rqstp->rq_maxpages,
>  				&rqstp->rq_res);
>  
>  	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,


Mike suggested that we add more info to the changelog that will
actually get committed to the kernel. How about this?

-----------------------------8<---------------------------
    sunrpc: add a slot to rqstp->rq_bvec for TCP record marker
    
    We've seen some occurrences of messages like this in dmesg on some knfsd
    servers:
    
        xdr_buf_to_bvec: bio_vec array overflow
    
    Usually followed by messages like this that indicate a short send:
    
        rpc-srv/tcp: nfsd: sent 1048155 when sending 1048152 bytes - shutting down socket
    
    svc_tcp_sendmsg() steals a slot in the rq_bvec array for the TCP record
    marker. If the send is an unaligned READ call though, then there may not
    be enough slots in the rq_bvec array in some cases.
    
    Add a slot to the rq_bvec array, and fix up the array lengths in the
    callers that care.
    
    Fixes: e18e157bb5c8 ("SUNRPC: Send RPC message on TCP with a single sock_sendmsg() call")
    Tested-by: Brandon Adams <brandona@meta.com>
    Signed-off-by: Jeff Layton <jlayton@kernel.org>
-----------------------------8<---------------------------
-- 
Jeff Layton <jlayton@kernel.org>
Re: [PATCH 2/2] sunrpc: add a slot to rqstp->rq_bvec for TCP record marker
Posted by Jeff Layton 2 months, 1 week ago
On Wed, 2025-10-08 at 14:58 -0400, Jeff Layton wrote:
> svc_tcp_sendmsg steals a slot in the rq_bvec array for the TCP record
> marker. If the send is an unaligned READ call though, then there may not
> be enough slots in the rq_bvec array.
> 
> Add a slot to the rq_bvec array, and fix up the array length
> calculations.
> 
> Fixes: e18e157bb5c8 ("SUNRPC: Send RPC message on TCP with a single sock_sendmsg() call")
> Signed-off-by: Jeff Layton <jlayton@kernel.org>
> ---

Chuck,

Brandon (cc'ed) was able to test a backported version of this patch and
validate that it worked. Can you add this?

Tested-by: Brandon Adams <brandona@meta.com>


>  fs/nfsd/vfs.c        | 6 +++---
>  net/sunrpc/svc.c     | 3 ++-
>  net/sunrpc/svcsock.c | 4 ++--
>  3 files changed, 7 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
> index 77f6879c2e063fa79865100bbc2d1e64eb332f42..c4e9300d657cf7fdba23f2f4e4bdaad9cd99d1a3 100644
> --- a/fs/nfsd/vfs.c
> +++ b/fs/nfsd/vfs.c
> @@ -1111,7 +1111,7 @@ nfsd_direct_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
>  
>  	v = 0;
>  	total = dio_end - dio_start;
> -	while (total && v < rqstp->rq_maxpages &&
> +	while (total && v < rqstp->rq_maxpages + 1 &&
>  	       rqstp->rq_next_page < rqstp->rq_page_end) {
>  		len = min_t(size_t, total, PAGE_SIZE);
>  		bvec_set_page(&rqstp->rq_bvec[v], *rqstp->rq_next_page,
> @@ -1200,7 +1200,7 @@ __be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
>  
>  	v = 0;
>  	total = *count;
> -	while (total && v < rqstp->rq_maxpages &&
> +	while (total && v < rqstp->rq_maxpages + 1 &&
>  	       rqstp->rq_next_page < rqstp->rq_page_end) {
>  		len = min_t(size_t, total, PAGE_SIZE - base);
>  		bvec_set_page(&rqstp->rq_bvec[v], *rqstp->rq_next_page,
> @@ -1318,7 +1318,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
>  	if (stable && !fhp->fh_use_wgather)
>  		kiocb.ki_flags |= IOCB_DSYNC;
>  
> -	nvecs = xdr_buf_to_bvec(rqstp->rq_bvec, rqstp->rq_maxpages, payload);
> +	nvecs = xdr_buf_to_bvec(rqstp->rq_bvec, rqstp->rq_maxpages + 1, payload);
>  	iov_iter_bvec(&iter, ITER_SOURCE, rqstp->rq_bvec, nvecs, *cnt);
>  	since = READ_ONCE(file->f_wb_err);
>  	if (verf)
> diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
> index 4704dce7284eccc9e2bc64cf22947666facfa86a..919263a0c04e3f1afa607414bc1893ba02206e38 100644
> --- a/net/sunrpc/svc.c
> +++ b/net/sunrpc/svc.c
> @@ -706,7 +706,8 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
>  	if (!svc_init_buffer(rqstp, serv, node))
>  		goto out_enomem;
>  
> -	rqstp->rq_bvec = kcalloc_node(rqstp->rq_maxpages,
> +	/* +1 for the TCP record marker */
> +	rqstp->rq_bvec = kcalloc_node(rqstp->rq_maxpages + 1,
>  				      sizeof(struct bio_vec),
>  				      GFP_KERNEL, node);
>  	if (!rqstp->rq_bvec)
> diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
> index 377fcaaaa061463fc5c85fc09c7a8eab5e06af77..5f8bb11b686bcd7302b94476490ba9b1b9ddc06a 100644
> --- a/net/sunrpc/svcsock.c
> +++ b/net/sunrpc/svcsock.c
> @@ -740,7 +740,7 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
>  	if (svc_xprt_is_dead(xprt))
>  		goto out_notconn;
>  
> -	count = xdr_buf_to_bvec(rqstp->rq_bvec, rqstp->rq_maxpages, xdr);
> +	count = xdr_buf_to_bvec(rqstp->rq_bvec, rqstp->rq_maxpages + 1, xdr);
>  
>  	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
>  		      count, rqstp->rq_res.len);
> @@ -1244,7 +1244,7 @@ static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp,
>  	memcpy(buf, &marker, sizeof(marker));
>  	bvec_set_virt(rqstp->rq_bvec, buf, sizeof(marker));
>  
> -	count = xdr_buf_to_bvec(rqstp->rq_bvec + 1, rqstp->rq_maxpages - 1,
> +	count = xdr_buf_to_bvec(rqstp->rq_bvec + 1, rqstp->rq_maxpages,
>  				&rqstp->rq_res);
>  
>  	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,

-- 
Jeff Layton <jlayton@kernel.org>