fs/nfs/nfs4proc.c | 37 +++++++++++++++++++++++++------------ fs/nfs/pnfs.c | 21 +++++++++++++++++++-- 2 files changed, 44 insertions(+), 14 deletions(-)
pnfs_layoutreturn_before_put_layout_hdr() bumps the layout header refcount
and sets NFS_LAYOUT_RETURN before prepare or rpc_run_task dispatch. If the
layout driver fails prepare or rpc_run_task() fails to queue the call, we
currently leak refs and leave waiters stuck on
pnfs_prepare_to_retry_layoutget().
Mirror the normal completion path for these early failures: warn and
schedule pnfs_layoutreturn_retry_later(), free any reserved slot, drop
refs/creds/inode, and clear the wait bit.
Signed-off-by: Robert Milkowski <rmilkowski@gmail.com>
---
fs/nfs/nfs4proc.c | 37 +++++++++++++++++++++++++------------
fs/nfs/pnfs.c | 21 +++++++++++++++++++--
2 files changed, 44 insertions(+), 14 deletions(-)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 93c6ce04332b..6066a1c7227d 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -10132,25 +10132,34 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
rpc_restart_call_prepare(task);
}
-static void nfs4_layoutreturn_release(void *calldata)
+static void nfs4_layoutreturn_cleanup(struct nfs4_layoutreturn *lrp, int status)
{
- struct nfs4_layoutreturn *lrp = calldata;
struct pnfs_layout_hdr *lo = lrp->args.layout;
- if (lrp->rpc_status == 0 || !lrp->inode)
- pnfs_layoutreturn_free_lsegs(
- lo, &lrp->args.stateid, &lrp->args.range,
- lrp->res.lrs_present ? &lrp->res.stateid : NULL);
+ if (status == 0 || !lrp->inode)
+ pnfs_layoutreturn_free_lsegs(lo, &lrp->args.stateid,
+ &lrp->args.range,
+ lrp->res.lrs_present ?
+ &lrp->res.stateid : NULL);
else
pnfs_layoutreturn_retry_later(lo, &lrp->args.stateid,
&lrp->args.range);
- nfs4_sequence_free_slot(&lrp->res.seq_res);
+ if (lrp->res.seq_res.sr_slot)
+ nfs4_sequence_free_slot(&lrp->res.seq_res);
if (lrp->ld_private.ops && lrp->ld_private.ops->free)
lrp->ld_private.ops->free(&lrp->ld_private);
- pnfs_put_layout_hdr(lrp->args.layout);
- nfs_iput_and_deactive(lrp->inode);
+ pnfs_put_layout_hdr(lo);
+ if (lrp->inode)
+ nfs_iput_and_deactive(lrp->inode);
put_cred(lrp->cred);
- kfree(calldata);
+ kfree(lrp);
+}
+
+static void nfs4_layoutreturn_release(void *calldata)
+{
+ struct nfs4_layoutreturn *lrp = calldata;
+
+ nfs4_layoutreturn_cleanup(lrp, lrp->rpc_status);
}
static const struct rpc_call_ops nfs4_layoutreturn_call_ops = {
@@ -10198,8 +10207,12 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, unsigned int flags)
nfs4_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1,
0);
task = rpc_run_task(&task_setup_data);
- if (IS_ERR(task))
- return PTR_ERR(task);
+ if (IS_ERR(task)) {
+ status = PTR_ERR(task);
+ trace_nfs4_layoutreturn(lrp->args.inode, &lrp->args.stateid, status);
+ nfs4_layoutreturn_cleanup(lrp, status);
+ return status;
+ }
if (!(flags & PNFS_FL_LAYOUTRETURN_ASYNC))
status = task->tk_status;
trace_nfs4_layoutreturn(lrp->args.inode, &lrp->args.stateid, status);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index f157d43d1312..a489f43344b8 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1370,13 +1370,30 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo,
lrp->args.ld_private = &lrp->ld_private;
lrp->clp = NFS_SERVER(ino)->nfs_client;
lrp->cred = cred;
- if (ld->prepare_layoutreturn)
- ld->prepare_layoutreturn(&lrp->args);
+ if (ld->prepare_layoutreturn) {
+ status = ld->prepare_layoutreturn(&lrp->args);
+ if (status) {
+ pr_warn_ratelimited("NFS: pNFS layoutreturn prepare failed (%d) for layout driver %s\n",
+ status, ld->name ? ld->name : "unknown");
+ goto out_prepare_fail;
+ }
+ }
status = nfs4_proc_layoutreturn(lrp, flags);
out:
dprintk("<-- %s status: %d\n", __func__, status);
return status;
+
+out_prepare_fail:
+ pnfs_layoutreturn_retry_later(lo, &lrp->args.stateid, &lrp->args.range);
+ if (lrp->ld_private.ops && lrp->ld_private.ops->free)
+ lrp->ld_private.ops->free(&lrp->ld_private);
+ if (lrp->inode)
+ nfs_iput_and_deactive(lrp->inode);
+ put_cred(cred);
+ kfree(lrp);
+ pnfs_put_layout_hdr(lo);
+ return status;
}
/* Return true if layoutreturn is needed */
base-commit: cb015814f8b6eebcbb8e46e111d108892c5e6821
--
2.47.1
On Tue, 2025-12-09 at 14:53 +0000, Robert Milkowski wrote:
> pnfs_layoutreturn_before_put_layout_hdr() bumps the layout header
> refcount
> and sets NFS_LAYOUT_RETURN before prepare or rpc_run_task dispatch.
> If the
> layout driver fails prepare or rpc_run_task() fails to queue the
> call, we
> currently leak refs and leave waiters stuck on
> pnfs_prepare_to_retry_layoutget().
>
> Mirror the normal completion path for these early failures: warn and
> schedule pnfs_layoutreturn_retry_later(), free any reserved slot,
> drop
> refs/creds/inode, and clear the wait bit.
>
> Signed-off-by: Robert Milkowski <rmilkowski@gmail.com>
> ---
> fs/nfs/nfs4proc.c | 37 +++++++++++++++++++++++++------------
> fs/nfs/pnfs.c | 21 +++++++++++++++++++--
> 2 files changed, 44 insertions(+), 14 deletions(-)
>
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index 93c6ce04332b..6066a1c7227d 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -10132,25 +10132,34 @@ static void nfs4_layoutreturn_done(struct
> rpc_task *task, void *calldata)
> rpc_restart_call_prepare(task);
> }
>
> -static void nfs4_layoutreturn_release(void *calldata)
> +static void nfs4_layoutreturn_cleanup(struct nfs4_layoutreturn *lrp,
> int status)
> {
> - struct nfs4_layoutreturn *lrp = calldata;
> struct pnfs_layout_hdr *lo = lrp->args.layout;
>
> - if (lrp->rpc_status == 0 || !lrp->inode)
> - pnfs_layoutreturn_free_lsegs(
> - lo, &lrp->args.stateid, &lrp->args.range,
> - lrp->res.lrs_present ? &lrp->res.stateid :
> NULL);
> + if (status == 0 || !lrp->inode)
> + pnfs_layoutreturn_free_lsegs(lo, &lrp->args.stateid,
> + &lrp->args.range,
> + lrp->res.lrs_present ?
> + &lrp->res.stateid :
> NULL);
> else
> pnfs_layoutreturn_retry_later(lo, &lrp-
> >args.stateid,
> &lrp->args.range);
> - nfs4_sequence_free_slot(&lrp->res.seq_res);
> + if (lrp->res.seq_res.sr_slot)
> + nfs4_sequence_free_slot(&lrp->res.seq_res);
> if (lrp->ld_private.ops && lrp->ld_private.ops->free)
> lrp->ld_private.ops->free(&lrp->ld_private);
> - pnfs_put_layout_hdr(lrp->args.layout);
> - nfs_iput_and_deactive(lrp->inode);
> + pnfs_put_layout_hdr(lo);
> + if (lrp->inode)
> + nfs_iput_and_deactive(lrp->inode);
> put_cred(lrp->cred);
> - kfree(calldata);
> + kfree(lrp);
> +}
> +
> +static void nfs4_layoutreturn_release(void *calldata)
> +{
> + struct nfs4_layoutreturn *lrp = calldata;
> +
> + nfs4_layoutreturn_cleanup(lrp, lrp->rpc_status);
> }
>
> static const struct rpc_call_ops nfs4_layoutreturn_call_ops = {
> @@ -10198,8 +10207,12 @@ int nfs4_proc_layoutreturn(struct
> nfs4_layoutreturn *lrp, unsigned int flags)
> nfs4_init_sequence(&lrp->args.seq_args, &lrp-
> >res.seq_res, 1,
> 0);
> task = rpc_run_task(&task_setup_data);
> - if (IS_ERR(task))
> - return PTR_ERR(task);
> + if (IS_ERR(task)) {
> + status = PTR_ERR(task);
> + trace_nfs4_layoutreturn(lrp->args.inode, &lrp-
> >args.stateid, status);
> + nfs4_layoutreturn_cleanup(lrp, status);
> + return status;
> + }
NACK. The above introduces a use-after-free. There is no need to call
the release routine after a call to rpc_run_task().
> if (!(flags & PNFS_FL_LAYOUTRETURN_ASYNC))
> status = task->tk_status;
> trace_nfs4_layoutreturn(lrp->args.inode, &lrp->args.stateid,
> status);
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index f157d43d1312..a489f43344b8 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -1370,13 +1370,30 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr
> *lo,
> lrp->args.ld_private = &lrp->ld_private;
> lrp->clp = NFS_SERVER(ino)->nfs_client;
> lrp->cred = cred;
> - if (ld->prepare_layoutreturn)
> - ld->prepare_layoutreturn(&lrp->args);
> + if (ld->prepare_layoutreturn) {
> + status = ld->prepare_layoutreturn(&lrp->args);
> + if (status) {
> + pr_warn_ratelimited("NFS: pNFS layoutreturn
> prepare failed (%d) for layout driver %s\n",
> + status, ld->name ? ld->name :
> "unknown");
> + goto out_prepare_fail;
> + }
> + }
This is also unnecessary. The existing code will cope just fine with
args->ld_private being unset.
>
> status = nfs4_proc_layoutreturn(lrp, flags);
> out:
> dprintk("<-- %s status: %d\n", __func__, status);
> return status;
> +
> +out_prepare_fail:
> + pnfs_layoutreturn_retry_later(lo, &lrp->args.stateid, &lrp-
> >args.range);
> + if (lrp->ld_private.ops && lrp->ld_private.ops->free)
> + lrp->ld_private.ops->free(&lrp->ld_private);
> + if (lrp->inode)
> + nfs_iput_and_deactive(lrp->inode);
> + put_cred(cred);
> + kfree(lrp);
> + pnfs_put_layout_hdr(lo);
> + return status;
> }
>
> /* Return true if layoutreturn is needed */
>
> base-commit: cb015814f8b6eebcbb8e46e111d108892c5e6821
--
Trond Myklebust
Linux NFS client maintainer, Hammerspace
trondmy@kernel.org, trond.myklebust@hammerspace.com
© 2016 - 2025 Red Hat, Inc.