[RFC PATCH] nfs: use nfsi->rwsem to protect traversal of the file lock list

Yang Erkun posted 1 patch 1 month, 1 week ago
fs/nfs/delegation.c     |  9 ++++++++-
fs/nfs/nfs4proc.c       | 22 +++++++++++-----------
include/linux/nfs_xdr.h |  1 -
3 files changed, 19 insertions(+), 13 deletions(-)
[RFC PATCH] nfs: use nfsi->rwsem to protect traversal of the file lock list
Posted by Yang Erkun 1 month, 1 week ago
Lingfeng identified a bug and suggested two solutions, but both appear
to have issues.

Generally, we cannot release flc_lock while iterating over the file lock
list to avoid use-after-free (UAF) problems with file locks. However,
functions like nfs_delegation_claim_locks and nfs4_reclaim_locks cannot
adhere to this rule because recover_lock or nfs4_lock_delegation_recall
may take a long time. To resolve this, NFS switches to using nfsi->rwsem
for the same protection, and nfs_reclaim_locks follows this approach.
Although nfs_delegation_claim_locks uses so_delegreturn_mutex instead,
this is inadequate since a single inode can have multiple nfs4_state
instances. Therefore, the fix is to also use nfsi->rwsem in this case.

Furthermore, after commit c69899a17ca4 ("NFSv4: Update of VFS byte range
lock must be atomic with the stateid update"), the functions
nfs4_locku_done and nfs4_lock_done also break this rule because they
call locks_lock_inode_wait without holding nfsi->rwsem. Simply adding
this protection could cause many deadlocks, so instead, the call to
locks_lock_inode_wait is moved into _nfs4_proc_setlk. Regarding the bug
fixed by commit c69899a17ca4 ("NFSv4: Update of VFS byte range
lock must be atomic with the stateid update"), it has been resolved
after commit 0460253913e5 ("NFSv4: nfs4_do_open() is incorrectly triggering
state recovery") because all slots are drained before calling
nfs4_do_reclaim, which prevents concurrent stateid changes along this path.
Also, nfs_delegation_claim_locks does not cause this concurrency either
since when _nfs4_proc_setlk is called with NFS_DELEGATED_STATE, no RPC is
sent, so nfs4_lock_done is not called. Therefore,
nfs4_lock_delegation_recall from nfs_delegation_claim_locks is the first
time the stateid is set.

Reported-by: Li Lingfeng <lilingfeng3@huawei.com>
Closes: https://lore.kernel.org/all/20250419085709.1452492-1-lilingfeng3@huawei.com/
Closes: https://lore.kernel.org/all/20250715030559.2906634-1-lilingfeng3@huawei.com/
Fixes: c69899a17ca4 ("NFSv4: Update of VFS byte range lock must be atomic with the stateid update")
Signed-off-by: Yang Erkun <yangerkun@huawei.com>
---
 fs/nfs/delegation.c     |  9 ++++++++-
 fs/nfs/nfs4proc.c       | 22 +++++++++++-----------
 include/linux/nfs_xdr.h |  1 -
 3 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 122fb3f14ffb..9546d2195c25 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -173,6 +173,7 @@ int nfs4_check_delegation(struct inode *inode, fmode_t type)
 static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_stateid *stateid)
 {
 	struct inode *inode = state->inode;
+	struct nfs_inode *nfsi = NFS_I(inode);
 	struct file_lock *fl;
 	struct file_lock_context *flctx = locks_inode_context(inode);
 	struct list_head *list;
@@ -182,6 +183,9 @@ static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_state
 		goto out;
 
 	list = &flctx->flc_posix;
+
+	/* Guard against reclaim and new lock/unlock calls */
+	down_write(&nfsi->rwsem);
 	spin_lock(&flctx->flc_lock);
 restart:
 	for_each_file_lock(fl, list) {
@@ -189,8 +193,10 @@ static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_state
 			continue;
 		spin_unlock(&flctx->flc_lock);
 		status = nfs4_lock_delegation_recall(fl, state, stateid);
-		if (status < 0)
+		if (status < 0) {
+			up_write(&nfsi->rwsem);
 			goto out;
+		}
 		spin_lock(&flctx->flc_lock);
 	}
 	if (list == &flctx->flc_posix) {
@@ -198,6 +204,7 @@ static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_state
 		goto restart;
 	}
 	spin_unlock(&flctx->flc_lock);
+	up_write(&nfsi->rwsem);
 out:
 	return status;
 }
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 91bcf67bd743..9d6fbca8798b 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -7076,7 +7076,6 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
 	switch (task->tk_status) {
 		case 0:
 			renew_lease(calldata->server, calldata->timestamp);
-			locks_lock_inode_wait(calldata->lsp->ls_state->inode, &calldata->fl);
 			if (nfs4_update_lock_stateid(calldata->lsp,
 					&calldata->res.stateid))
 				break;
@@ -7344,11 +7343,6 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
 	case 0:
 		renew_lease(NFS_SERVER(d_inode(data->ctx->dentry)),
 				data->timestamp);
-		if (data->arg.new_lock && !data->cancelled) {
-			data->fl.c.flc_flags &= ~(FL_SLEEP | FL_ACCESS);
-			if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0)
-				goto out_restart;
-		}
 		if (data->arg.new_lock_owner != 0) {
 			nfs_confirm_seqid(&lsp->ls_seqid, 0);
 			nfs4_stateid_copy(&lsp->ls_stateid, &data->res.stateid);
@@ -7459,11 +7453,10 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
 	msg.rpc_argp = &data->arg;
 	msg.rpc_resp = &data->res;
 	task_setup_data.callback_data = data;
-	if (recovery_type > NFS_LOCK_NEW) {
-		if (recovery_type == NFS_LOCK_RECLAIM)
-			data->arg.reclaim = NFS_LOCK_RECLAIM;
-	} else
-		data->arg.new_lock = 1;
+
+	if (recovery_type == NFS_LOCK_RECLAIM)
+		data->arg.reclaim = NFS_LOCK_RECLAIM;
+
 	task = rpc_run_task(&task_setup_data);
 	if (IS_ERR(task))
 		return PTR_ERR(task);
@@ -7573,6 +7566,13 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
 	up_read(&nfsi->rwsem);
 	mutex_unlock(&sp->so_delegreturn_mutex);
 	status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW);
+	if (status)
+		goto out;
+
+	down_read(&nfsi->rwsem);
+	request->c.flc_flags &= ~(FL_SLEEP | FL_ACCESS);
+	status = locks_lock_inode_wait(state->inode, request);
+	up_read(&nfsi->rwsem);
 out:
 	request->c.flc_flags = flags;
 	return status;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index ff1f12aa73d2..9599ad15c3ad 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -580,7 +580,6 @@ struct nfs_lock_args {
 	struct nfs_lowner	lock_owner;
 	unsigned char		block : 1;
 	unsigned char		reclaim : 1;
-	unsigned char		new_lock : 1;
 	unsigned char		new_lock_owner : 1;
 };
 
-- 
2.39.2
Re: [RFC PATCH] nfs: use nfsi->rwsem to protect traversal of the file lock list
Posted by Jeff Layton 4 weeks ago
On Thu, 2026-02-26 at 09:22 +0800, Yang Erkun wrote:
> Lingfeng identified a bug and suggested two solutions, but both appear
> to have issues.
> 
> Generally, we cannot release flc_lock while iterating over the file lock
> list to avoid use-after-free (UAF) problems with file locks. However,
> functions like nfs_delegation_claim_locks and nfs4_reclaim_locks cannot
> adhere to this rule because recover_lock or nfs4_lock_delegation_recall
> may take a long time. To resolve this, NFS switches to using nfsi->rwsem
> for the same protection, and nfs_reclaim_locks follows this approach.
> Although nfs_delegation_claim_locks uses so_delegreturn_mutex instead,
> this is inadequate since a single inode can have multiple nfs4_state
> instances. Therefore, the fix is to also use nfsi->rwsem in this case.
> 
> Furthermore, after commit c69899a17ca4 ("NFSv4: Update of VFS byte range
> lock must be atomic with the stateid update"), the functions
> nfs4_locku_done and nfs4_lock_done also break this rule because they
> call locks_lock_inode_wait without holding nfsi->rwsem. Simply adding
> this protection could cause many deadlocks, so instead, the call to
> locks_lock_inode_wait is moved into _nfs4_proc_setlk. Regarding the bug
> fixed by commit c69899a17ca4 ("NFSv4: Update of VFS byte range
> lock must be atomic with the stateid update"), it has been resolved
> after commit 0460253913e5 ("NFSv4: nfs4_do_open() is incorrectly triggering
> state recovery") because all slots are drained before calling
> nfs4_do_reclaim, which prevents concurrent stateid changes along this path.
> Also, nfs_delegation_claim_locks does not cause this concurrency either
> since when _nfs4_proc_setlk is called with NFS_DELEGATED_STATE, no RPC is
> sent, so nfs4_lock_done is not called. Therefore,
> nfs4_lock_delegation_recall from nfs_delegation_claim_locks is the first
> time the stateid is set.
> 
> Reported-by: Li Lingfeng <lilingfeng3@huawei.com>
> Closes: https://lore.kernel.org/all/20250419085709.1452492-1-lilingfeng3@huawei.com/
> Closes: https://lore.kernel.org/all/20250715030559.2906634-1-lilingfeng3@huawei.com/
> Fixes: c69899a17ca4 ("NFSv4: Update of VFS byte range lock must be atomic with the stateid update")
> Signed-off-by: Yang Erkun <yangerkun@huawei.com>
> ---
>  fs/nfs/delegation.c     |  9 ++++++++-
>  fs/nfs/nfs4proc.c       | 22 +++++++++++-----------
>  include/linux/nfs_xdr.h |  1 -
>  3 files changed, 19 insertions(+), 13 deletions(-)
> 
> diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
> index 122fb3f14ffb..9546d2195c25 100644
> --- a/fs/nfs/delegation.c
> +++ b/fs/nfs/delegation.c
> @@ -173,6 +173,7 @@ int nfs4_check_delegation(struct inode *inode, fmode_t type)
>  static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_stateid *stateid)
>  {
>  	struct inode *inode = state->inode;
> +	struct nfs_inode *nfsi = NFS_I(inode);
>  	struct file_lock *fl;
>  	struct file_lock_context *flctx = locks_inode_context(inode);
>  	struct list_head *list;
> @@ -182,6 +183,9 @@ static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_state
>  		goto out;
>  
>  	list = &flctx->flc_posix;
> +
> +	/* Guard against reclaim and new lock/unlock calls */
> +	down_write(&nfsi->rwsem);
>  	spin_lock(&flctx->flc_lock);
>  restart:
>  	for_each_file_lock(fl, list) {
> @@ -189,8 +193,10 @@ static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_state
>  			continue;
>  		spin_unlock(&flctx->flc_lock);
>  		status = nfs4_lock_delegation_recall(fl, state, stateid);
> -		if (status < 0)
> +		if (status < 0) {
> +			up_write(&nfsi->rwsem);
>  			goto out;
> +		}
>  		spin_lock(&flctx->flc_lock);
>  	}
>  	if (list == &flctx->flc_posix) {
> @@ -198,6 +204,7 @@ static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_state
>  		goto restart;
>  	}
>  	spin_unlock(&flctx->flc_lock);
> +	up_write(&nfsi->rwsem);
>  out:
>  	return status;
>  }
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index 91bcf67bd743..9d6fbca8798b 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -7076,7 +7076,6 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
>  	switch (task->tk_status) {
>  		case 0:
>  			renew_lease(calldata->server, calldata->timestamp);
> -			locks_lock_inode_wait(calldata->lsp->ls_state->inode, &calldata->fl);
>  			if (nfs4_update_lock_stateid(calldata->lsp,
>  					&calldata->res.stateid))
>  				break;
> @@ -7344,11 +7343,6 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
>  	case 0:
>  		renew_lease(NFS_SERVER(d_inode(data->ctx->dentry)),
>  				data->timestamp);
> -		if (data->arg.new_lock && !data->cancelled) {
> -			data->fl.c.flc_flags &= ~(FL_SLEEP | FL_ACCESS);
> -			if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0)
> -				goto out_restart;
> -		}
>  		if (data->arg.new_lock_owner != 0) {
>  			nfs_confirm_seqid(&lsp->ls_seqid, 0);
>  			nfs4_stateid_copy(&lsp->ls_stateid, &data->res.stateid);
> @@ -7459,11 +7453,10 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
>  	msg.rpc_argp = &data->arg;
>  	msg.rpc_resp = &data->res;
>  	task_setup_data.callback_data = data;
> -	if (recovery_type > NFS_LOCK_NEW) {
> -		if (recovery_type == NFS_LOCK_RECLAIM)
> -			data->arg.reclaim = NFS_LOCK_RECLAIM;
> -	} else
> -		data->arg.new_lock = 1;
> +
> +	if (recovery_type == NFS_LOCK_RECLAIM)
> +		data->arg.reclaim = NFS_LOCK_RECLAIM;
> +
>  	task = rpc_run_task(&task_setup_data);
>  	if (IS_ERR(task))
>  		return PTR_ERR(task);
> @@ -7573,6 +7566,13 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
>  	up_read(&nfsi->rwsem);
>  	mutex_unlock(&sp->so_delegreturn_mutex);
>  	status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW);
> +	if (status)
> +		goto out;
> +
> +	down_read(&nfsi->rwsem);
> +	request->c.flc_flags &= ~(FL_SLEEP | FL_ACCESS);
> +	status = locks_lock_inode_wait(state->inode, request);
> +	up_read(&nfsi->rwsem);
>  out:
>  	request->c.flc_flags = flags;
>  	return status;
> diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
> index ff1f12aa73d2..9599ad15c3ad 100644
> --- a/include/linux/nfs_xdr.h
> +++ b/include/linux/nfs_xdr.h
> @@ -580,7 +580,6 @@ struct nfs_lock_args {
>  	struct nfs_lowner	lock_owner;
>  	unsigned char		block : 1;
>  	unsigned char		reclaim : 1;
> -	unsigned char		new_lock : 1;
>  	unsigned char		new_lock_owner : 1;
>  };
>  

FWIW, I did point Claude at this too and it found no regressions. The
commit log was a bit hard to parse, Claude's summary is here:

This patch fixes a use-after-free bug in NFS file lock list traversal.
The core problem was that nfs_delegation_claim_locks released flctx-
>flc_lock during iteration (to call nfs4_lock_delegation_recall, which
can block on RPCs) and relied on so_delegreturn_mutex for protection —
but that mutex is per-state-owner, not per-inode, so multiple state
owners on the same inode could race.                                
                                                                                     
The fix uses nfsi->rwsem (per-inode) for proper protection, matching
the existing pattern in nfs4_reclaim_locks. It also moves
locks_lock_inode_wait out of RPC callbacks (nfs4_lock_done,
nfs4_locku_done) into the synchronous caller (_nfs4_proc_setlk) so it
can be called under nfsi->rwsem.                                      

-- 
Jeff Layton <jlayton@kernel.org>
Re: [RFC PATCH] nfs: use nfsi->rwsem to protect traversal of the file lock list
Posted by Jeff Layton 4 weeks ago
On Thu, 2026-02-26 at 09:22 +0800, Yang Erkun wrote:
> Lingfeng identified a bug and suggested two solutions, but both appear
> to have issues.
> 
> Generally, we cannot release flc_lock while iterating over the file lock
> list to avoid use-after-free (UAF) problems with file locks. However,
> functions like nfs_delegation_claim_locks and nfs4_reclaim_locks cannot
> adhere to this rule because recover_lock or nfs4_lock_delegation_recall
> may take a long time. To resolve this, NFS switches to using nfsi->rwsem
> for the same protection, and nfs_reclaim_locks follows this approach.
> Although nfs_delegation_claim_locks uses so_delegreturn_mutex instead,
> this is inadequate since a single inode can have multiple nfs4_state
> instances. Therefore, the fix is to also use nfsi->rwsem in this case.
> 
> Furthermore, after commit c69899a17ca4 ("NFSv4: Update of VFS byte range
> lock must be atomic with the stateid update"), the functions
> nfs4_locku_done and nfs4_lock_done also break this rule because they
> call locks_lock_inode_wait without holding nfsi->rwsem. Simply adding
> this protection could cause many deadlocks, so instead, the call to
> locks_lock_inode_wait is moved into _nfs4_proc_setlk. Regarding the bug
> fixed by commit c69899a17ca4 ("NFSv4: Update of VFS byte range
> lock must be atomic with the stateid update"), it has been resolved
> after commit 0460253913e5 ("NFSv4: nfs4_do_open() is incorrectly triggering
> state recovery") because all slots are drained before calling
> nfs4_do_reclaim, which prevents concurrent stateid changes along this path.
> Also, nfs_delegation_claim_locks does not cause this concurrency either
> since when _nfs4_proc_setlk is called with NFS_DELEGATED_STATE, no RPC is
> sent, so nfs4_lock_done is not called. Therefore,
> nfs4_lock_delegation_recall from nfs_delegation_claim_locks is the first
> time the stateid is set.
> 
> Reported-by: Li Lingfeng <lilingfeng3@huawei.com>
> Closes: https://lore.kernel.org/all/20250419085709.1452492-1-lilingfeng3@huawei.com/
> Closes: https://lore.kernel.org/all/20250715030559.2906634-1-lilingfeng3@huawei.com/
> Fixes: c69899a17ca4 ("NFSv4: Update of VFS byte range lock must be atomic with the stateid update")
> Signed-off-by: Yang Erkun <yangerkun@huawei.com>
> ---
>  fs/nfs/delegation.c     |  9 ++++++++-
>  fs/nfs/nfs4proc.c       | 22 +++++++++++-----------
>  include/linux/nfs_xdr.h |  1 -
>  3 files changed, 19 insertions(+), 13 deletions(-)
> 
> diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
> index 122fb3f14ffb..9546d2195c25 100644
> --- a/fs/nfs/delegation.c
> +++ b/fs/nfs/delegation.c
> @@ -173,6 +173,7 @@ int nfs4_check_delegation(struct inode *inode, fmode_t type)
>  static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_stateid *stateid)
>  {
>  	struct inode *inode = state->inode;
> +	struct nfs_inode *nfsi = NFS_I(inode);
>  	struct file_lock *fl;
>  	struct file_lock_context *flctx = locks_inode_context(inode);
>  	struct list_head *list;
> @@ -182,6 +183,9 @@ static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_state
>  		goto out;
>  
>  	list = &flctx->flc_posix;
> +
> +	/* Guard against reclaim and new lock/unlock calls */
> +	down_write(&nfsi->rwsem);
>  	spin_lock(&flctx->flc_lock);
>  restart:
>  	for_each_file_lock(fl, list) {
> @@ -189,8 +193,10 @@ static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_state
>  			continue;
>  		spin_unlock(&flctx->flc_lock);
>  		status = nfs4_lock_delegation_recall(fl, state, stateid);
> -		if (status < 0)
> +		if (status < 0) {
> +			up_write(&nfsi->rwsem);
>  			goto out;
> +		}
>  		spin_lock(&flctx->flc_lock);
>  	}
>  	if (list == &flctx->flc_posix) {
> @@ -198,6 +204,7 @@ static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_state
>  		goto restart;
>  	}
>  	spin_unlock(&flctx->flc_lock);
> +	up_write(&nfsi->rwsem);
>  out:
>  	return status;
>  }
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index 91bcf67bd743..9d6fbca8798b 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -7076,7 +7076,6 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
>  	switch (task->tk_status) {
>  		case 0:
>  			renew_lease(calldata->server, calldata->timestamp);
> -			locks_lock_inode_wait(calldata->lsp->ls_state->inode, &calldata->fl);
>  			if (nfs4_update_lock_stateid(calldata->lsp,
>  					&calldata->res.stateid))
>  				break;
> @@ -7344,11 +7343,6 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
>  	case 0:
>  		renew_lease(NFS_SERVER(d_inode(data->ctx->dentry)),
>  				data->timestamp);
> -		if (data->arg.new_lock && !data->cancelled) {
> -			data->fl.c.flc_flags &= ~(FL_SLEEP | FL_ACCESS);
> -			if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0)
> -				goto out_restart;
> -		}
>  		if (data->arg.new_lock_owner != 0) {
>  			nfs_confirm_seqid(&lsp->ls_seqid, 0);
>  			nfs4_stateid_copy(&lsp->ls_stateid, &data->res.stateid);
> @@ -7459,11 +7453,10 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
>  	msg.rpc_argp = &data->arg;
>  	msg.rpc_resp = &data->res;
>  	task_setup_data.callback_data = data;
> -	if (recovery_type > NFS_LOCK_NEW) {
> -		if (recovery_type == NFS_LOCK_RECLAIM)
> -			data->arg.reclaim = NFS_LOCK_RECLAIM;
> -	} else
> -		data->arg.new_lock = 1;
> +
> +	if (recovery_type == NFS_LOCK_RECLAIM)
> +		data->arg.reclaim = NFS_LOCK_RECLAIM;
> +
>  	task = rpc_run_task(&task_setup_data);
>  	if (IS_ERR(task))
>  		return PTR_ERR(task);
> @@ -7573,6 +7566,13 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
>  	up_read(&nfsi->rwsem);
>  	mutex_unlock(&sp->so_delegreturn_mutex);
>  	status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW);
> +	if (status)
> +		goto out;
> +
> +	down_read(&nfsi->rwsem);
> +	request->c.flc_flags &= ~(FL_SLEEP | FL_ACCESS);
> +	status = locks_lock_inode_wait(state->inode, request);
> +	up_read(&nfsi->rwsem);
>  out:
>  	request->c.flc_flags = flags;
>  	return status;
> diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
> index ff1f12aa73d2..9599ad15c3ad 100644
> --- a/include/linux/nfs_xdr.h
> +++ b/include/linux/nfs_xdr.h
> @@ -580,7 +580,6 @@ struct nfs_lock_args {
>  	struct nfs_lowner	lock_owner;
>  	unsigned char		block : 1;
>  	unsigned char		reclaim : 1;
> -	unsigned char		new_lock : 1;
>  	unsigned char		new_lock_owner : 1;
>  };
>  

Nice work!

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Re: [RFC PATCH] nfs: use nfsi->rwsem to protect traversal of the file lock list
Posted by yangerkun 4 weeks ago
在 2026/3/9 22:09, Jeff Layton 写道:
> On Thu, 2026-02-26 at 09:22 +0800, Yang Erkun wrote:
>> Lingfeng identified a bug and suggested two solutions, but both appear
>> to have issues.
>>
>> Generally, we cannot release flc_lock while iterating over the file lock
>> list to avoid use-after-free (UAF) problems with file locks. However,
>> functions like nfs_delegation_claim_locks and nfs4_reclaim_locks cannot
>> adhere to this rule because recover_lock or nfs4_lock_delegation_recall
>> may take a long time. To resolve this, NFS switches to using nfsi->rwsem
>> for the same protection, and nfs_reclaim_locks follows this approach.
>> Although nfs_delegation_claim_locks uses so_delegreturn_mutex instead,
>> this is inadequate since a single inode can have multiple nfs4_state
>> instances. Therefore, the fix is to also use nfsi->rwsem in this case.
>>
>> Furthermore, after commit c69899a17ca4 ("NFSv4: Update of VFS byte range
>> lock must be atomic with the stateid update"), the functions
>> nfs4_locku_done and nfs4_lock_done also break this rule because they
>> call locks_lock_inode_wait without holding nfsi->rwsem. Simply adding
>> this protection could cause many deadlocks, so instead, the call to
>> locks_lock_inode_wait is moved into _nfs4_proc_setlk. Regarding the bug
>> fixed by commit c69899a17ca4 ("NFSv4: Update of VFS byte range
>> lock must be atomic with the stateid update"), it has been resolved
>> after commit 0460253913e5 ("NFSv4: nfs4_do_open() is incorrectly triggering
>> state recovery") because all slots are drained before calling
>> nfs4_do_reclaim, which prevents concurrent stateid changes along this path.
>> Also, nfs_delegation_claim_locks does not cause this concurrency either
>> since when _nfs4_proc_setlk is called with NFS_DELEGATED_STATE, no RPC is
>> sent, so nfs4_lock_done is not called. Therefore,
>> nfs4_lock_delegation_recall from nfs_delegation_claim_locks is the first
>> time the stateid is set.
>>
>> Reported-by: Li Lingfeng <lilingfeng3@huawei.com>
>> Closes: https://lore.kernel.org/all/20250419085709.1452492-1-lilingfeng3@huawei.com/
>> Closes: https://lore.kernel.org/all/20250715030559.2906634-1-lilingfeng3@huawei.com/
>> Fixes: c69899a17ca4 ("NFSv4: Update of VFS byte range lock must be atomic with the stateid update")
>> Signed-off-by: Yang Erkun <yangerkun@huawei.com>
>> ---
>>   fs/nfs/delegation.c     |  9 ++++++++-
>>   fs/nfs/nfs4proc.c       | 22 +++++++++++-----------
>>   include/linux/nfs_xdr.h |  1 -
>>   3 files changed, 19 insertions(+), 13 deletions(-)
>>
>> diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
>> index 122fb3f14ffb..9546d2195c25 100644
>> --- a/fs/nfs/delegation.c
>> +++ b/fs/nfs/delegation.c
>> @@ -173,6 +173,7 @@ int nfs4_check_delegation(struct inode *inode, fmode_t type)
>>   static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_stateid *stateid)
>>   {
>>   	struct inode *inode = state->inode;
>> +	struct nfs_inode *nfsi = NFS_I(inode);
>>   	struct file_lock *fl;
>>   	struct file_lock_context *flctx = locks_inode_context(inode);
>>   	struct list_head *list;
>> @@ -182,6 +183,9 @@ static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_state
>>   		goto out;
>>   
>>   	list = &flctx->flc_posix;
>> +
>> +	/* Guard against reclaim and new lock/unlock calls */
>> +	down_write(&nfsi->rwsem);
>>   	spin_lock(&flctx->flc_lock);
>>   restart:
>>   	for_each_file_lock(fl, list) {
>> @@ -189,8 +193,10 @@ static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_state
>>   			continue;
>>   		spin_unlock(&flctx->flc_lock);
>>   		status = nfs4_lock_delegation_recall(fl, state, stateid);
>> -		if (status < 0)
>> +		if (status < 0) {
>> +			up_write(&nfsi->rwsem);
>>   			goto out;
>> +		}
>>   		spin_lock(&flctx->flc_lock);
>>   	}
>>   	if (list == &flctx->flc_posix) {
>> @@ -198,6 +204,7 @@ static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_state
>>   		goto restart;
>>   	}
>>   	spin_unlock(&flctx->flc_lock);
>> +	up_write(&nfsi->rwsem);
>>   out:
>>   	return status;
>>   }
>> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
>> index 91bcf67bd743..9d6fbca8798b 100644
>> --- a/fs/nfs/nfs4proc.c
>> +++ b/fs/nfs/nfs4proc.c
>> @@ -7076,7 +7076,6 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
>>   	switch (task->tk_status) {
>>   		case 0:
>>   			renew_lease(calldata->server, calldata->timestamp);
>> -			locks_lock_inode_wait(calldata->lsp->ls_state->inode, &calldata->fl);
>>   			if (nfs4_update_lock_stateid(calldata->lsp,
>>   					&calldata->res.stateid))
>>   				break;
>> @@ -7344,11 +7343,6 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
>>   	case 0:
>>   		renew_lease(NFS_SERVER(d_inode(data->ctx->dentry)),
>>   				data->timestamp);
>> -		if (data->arg.new_lock && !data->cancelled) {
>> -			data->fl.c.flc_flags &= ~(FL_SLEEP | FL_ACCESS);
>> -			if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0)
>> -				goto out_restart;
>> -		}
>>   		if (data->arg.new_lock_owner != 0) {
>>   			nfs_confirm_seqid(&lsp->ls_seqid, 0);
>>   			nfs4_stateid_copy(&lsp->ls_stateid, &data->res.stateid);
>> @@ -7459,11 +7453,10 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
>>   	msg.rpc_argp = &data->arg;
>>   	msg.rpc_resp = &data->res;
>>   	task_setup_data.callback_data = data;
>> -	if (recovery_type > NFS_LOCK_NEW) {
>> -		if (recovery_type == NFS_LOCK_RECLAIM)
>> -			data->arg.reclaim = NFS_LOCK_RECLAIM;
>> -	} else
>> -		data->arg.new_lock = 1;
>> +
>> +	if (recovery_type == NFS_LOCK_RECLAIM)
>> +		data->arg.reclaim = NFS_LOCK_RECLAIM;
>> +
>>   	task = rpc_run_task(&task_setup_data);
>>   	if (IS_ERR(task))
>>   		return PTR_ERR(task);
>> @@ -7573,6 +7566,13 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
>>   	up_read(&nfsi->rwsem);
>>   	mutex_unlock(&sp->so_delegreturn_mutex);
>>   	status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW);
>> +	if (status)
>> +		goto out;
>> +
>> +	down_read(&nfsi->rwsem);
>> +	request->c.flc_flags &= ~(FL_SLEEP | FL_ACCESS);
>> +	status = locks_lock_inode_wait(state->inode, request);
>> +	up_read(&nfsi->rwsem);
>>   out:
>>   	request->c.flc_flags = flags;
>>   	return status;
>> diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
>> index ff1f12aa73d2..9599ad15c3ad 100644
>> --- a/include/linux/nfs_xdr.h
>> +++ b/include/linux/nfs_xdr.h
>> @@ -580,7 +580,6 @@ struct nfs_lock_args {
>>   	struct nfs_lowner	lock_owner;
>>   	unsigned char		block : 1;
>>   	unsigned char		reclaim : 1;
>> -	unsigned char		new_lock : 1;
>>   	unsigned char		new_lock_owner : 1;
>>   };
>>   
> 
> Nice work!
> 
> Reviewed-by: Jeff Layton <jlayton@kernel.org>

Hi Jeff, thanks a lot for your review!

> 
> 

Re: [RFC PATCH] nfs: use nfsi->rwsem to protect traversal of the file lock list
Posted by yangerkun 1 month, 1 week ago
Hi all,

This issue has been known for a long time now, and I am really looking 
forward to some discussion on this problem and the solution I have proposed.

Thanks,
Erkun.

在 2026/2/26 9:22, Yang Erkun 写道:
> Lingfeng identified a bug and suggested two solutions, but both appear
> to have issues.
> 
> Generally, we cannot release flc_lock while iterating over the file lock
> list to avoid use-after-free (UAF) problems with file locks. However,
> functions like nfs_delegation_claim_locks and nfs4_reclaim_locks cannot
> adhere to this rule because recover_lock or nfs4_lock_delegation_recall
> may take a long time. To resolve this, NFS switches to using nfsi->rwsem
> for the same protection, and nfs_reclaim_locks follows this approach.
> Although nfs_delegation_claim_locks uses so_delegreturn_mutex instead,
> this is inadequate since a single inode can have multiple nfs4_state
> instances. Therefore, the fix is to also use nfsi->rwsem in this case.
> 
> Furthermore, after commit c69899a17ca4 ("NFSv4: Update of VFS byte range
> lock must be atomic with the stateid update"), the functions
> nfs4_locku_done and nfs4_lock_done also break this rule because they
> call locks_lock_inode_wait without holding nfsi->rwsem. Simply adding
> this protection could cause many deadlocks, so instead, the call to
> locks_lock_inode_wait is moved into _nfs4_proc_setlk. Regarding the bug
> fixed by commit c69899a17ca4 ("NFSv4: Update of VFS byte range
> lock must be atomic with the stateid update"), it has been resolved
> after commit 0460253913e5 ("NFSv4: nfs4_do_open() is incorrectly triggering
> state recovery") because all slots are drained before calling
> nfs4_do_reclaim, which prevents concurrent stateid changes along this path.
> Also, nfs_delegation_claim_locks does not cause this concurrency either
> since when _nfs4_proc_setlk is called with NFS_DELEGATED_STATE, no RPC is
> sent, so nfs4_lock_done is not called. Therefore,
> nfs4_lock_delegation_recall from nfs_delegation_claim_locks is the first
> time the stateid is set.
> 
> Reported-by: Li Lingfeng <lilingfeng3@huawei.com>
> Closes: https://lore.kernel.org/all/20250419085709.1452492-1-lilingfeng3@huawei.com/
> Closes: https://lore.kernel.org/all/20250715030559.2906634-1-lilingfeng3@huawei.com/
> Fixes: c69899a17ca4 ("NFSv4: Update of VFS byte range lock must be atomic with the stateid update")
> Signed-off-by: Yang Erkun <yangerkun@huawei.com>
> ---
>   fs/nfs/delegation.c     |  9 ++++++++-
>   fs/nfs/nfs4proc.c       | 22 +++++++++++-----------
>   include/linux/nfs_xdr.h |  1 -
>   3 files changed, 19 insertions(+), 13 deletions(-)
> 
> diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
> index 122fb3f14ffb..9546d2195c25 100644
> --- a/fs/nfs/delegation.c
> +++ b/fs/nfs/delegation.c
> @@ -173,6 +173,7 @@ int nfs4_check_delegation(struct inode *inode, fmode_t type)
>   static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_stateid *stateid)
>   {
>   	struct inode *inode = state->inode;
> +	struct nfs_inode *nfsi = NFS_I(inode);
>   	struct file_lock *fl;
>   	struct file_lock_context *flctx = locks_inode_context(inode);
>   	struct list_head *list;
> @@ -182,6 +183,9 @@ static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_state
>   		goto out;
>   
>   	list = &flctx->flc_posix;
> +
> +	/* Guard against reclaim and new lock/unlock calls */
> +	down_write(&nfsi->rwsem);
>   	spin_lock(&flctx->flc_lock);
>   restart:
>   	for_each_file_lock(fl, list) {
> @@ -189,8 +193,10 @@ static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_state
>   			continue;
>   		spin_unlock(&flctx->flc_lock);
>   		status = nfs4_lock_delegation_recall(fl, state, stateid);
> -		if (status < 0)
> +		if (status < 0) {
> +			up_write(&nfsi->rwsem);
>   			goto out;
> +		}
>   		spin_lock(&flctx->flc_lock);
>   	}
>   	if (list == &flctx->flc_posix) {
> @@ -198,6 +204,7 @@ static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_state
>   		goto restart;
>   	}
>   	spin_unlock(&flctx->flc_lock);
> +	up_write(&nfsi->rwsem);
>   out:
>   	return status;
>   }
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index 91bcf67bd743..9d6fbca8798b 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -7076,7 +7076,6 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
>   	switch (task->tk_status) {
>   		case 0:
>   			renew_lease(calldata->server, calldata->timestamp);
> -			locks_lock_inode_wait(calldata->lsp->ls_state->inode, &calldata->fl);
>   			if (nfs4_update_lock_stateid(calldata->lsp,
>   					&calldata->res.stateid))
>   				break;
> @@ -7344,11 +7343,6 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
>   	case 0:
>   		renew_lease(NFS_SERVER(d_inode(data->ctx->dentry)),
>   				data->timestamp);
> -		if (data->arg.new_lock && !data->cancelled) {
> -			data->fl.c.flc_flags &= ~(FL_SLEEP | FL_ACCESS);
> -			if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0)
> -				goto out_restart;
> -		}
>   		if (data->arg.new_lock_owner != 0) {
>   			nfs_confirm_seqid(&lsp->ls_seqid, 0);
>   			nfs4_stateid_copy(&lsp->ls_stateid, &data->res.stateid);
> @@ -7459,11 +7453,10 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
>   	msg.rpc_argp = &data->arg;
>   	msg.rpc_resp = &data->res;
>   	task_setup_data.callback_data = data;
> -	if (recovery_type > NFS_LOCK_NEW) {
> -		if (recovery_type == NFS_LOCK_RECLAIM)
> -			data->arg.reclaim = NFS_LOCK_RECLAIM;
> -	} else
> -		data->arg.new_lock = 1;
> +
> +	if (recovery_type == NFS_LOCK_RECLAIM)
> +		data->arg.reclaim = NFS_LOCK_RECLAIM;
> +
>   	task = rpc_run_task(&task_setup_data);
>   	if (IS_ERR(task))
>   		return PTR_ERR(task);
> @@ -7573,6 +7566,13 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
>   	up_read(&nfsi->rwsem);
>   	mutex_unlock(&sp->so_delegreturn_mutex);
>   	status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW);
> +	if (status)
> +		goto out;
> +
> +	down_read(&nfsi->rwsem);
> +	request->c.flc_flags &= ~(FL_SLEEP | FL_ACCESS);
> +	status = locks_lock_inode_wait(state->inode, request);
> +	up_read(&nfsi->rwsem);
>   out:
>   	request->c.flc_flags = flags;
>   	return status;
> diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
> index ff1f12aa73d2..9599ad15c3ad 100644
> --- a/include/linux/nfs_xdr.h
> +++ b/include/linux/nfs_xdr.h
> @@ -580,7 +580,6 @@ struct nfs_lock_args {
>   	struct nfs_lowner	lock_owner;
>   	unsigned char		block : 1;
>   	unsigned char		reclaim : 1;
> -	unsigned char		new_lock : 1;
>   	unsigned char		new_lock_owner : 1;
>   };
>   

Re: [RFC PATCH] nfs: use nfsi->rwsem to protect traversal of the file lock list
Posted by yangerkun 4 weeks ago
ping again

在 2026/2/26 16:34, yangerkun 写道:
> Hi all,
> 
> This issue has been known for a long time now, and I am really looking 
> forward to some discussion on this problem and the solution I have 
> proposed.
> 
> Thanks,
> Erkun.
> 
> 在 2026/2/26 9:22, Yang Erkun 写道:
>> Lingfeng identified a bug and suggested two solutions, but both appear
>> to have issues.
>>
>> Generally, we cannot release flc_lock while iterating over the file lock
>> list to avoid use-after-free (UAF) problems with file locks. However,
>> functions like nfs_delegation_claim_locks and nfs4_reclaim_locks cannot
>> adhere to this rule because recover_lock or nfs4_lock_delegation_recall
>> may take a long time. To resolve this, NFS switches to using nfsi->rwsem
>> for the same protection, and nfs_reclaim_locks follows this approach.
>> Although nfs_delegation_claim_locks uses so_delegreturn_mutex instead,
>> this is inadequate since a single inode can have multiple nfs4_state
>> instances. Therefore, the fix is to also use nfsi->rwsem in this case.
>>
>> Furthermore, after commit c69899a17ca4 ("NFSv4: Update of VFS byte range
>> lock must be atomic with the stateid update"), the functions
>> nfs4_locku_done and nfs4_lock_done also break this rule because they
>> call locks_lock_inode_wait without holding nfsi->rwsem. Simply adding
>> this protection could cause many deadlocks, so instead, the call to
>> locks_lock_inode_wait is moved into _nfs4_proc_setlk. Regarding the bug
>> fixed by commit c69899a17ca4 ("NFSv4: Update of VFS byte range
>> lock must be atomic with the stateid update"), it has been resolved
>> after commit 0460253913e5 ("NFSv4: nfs4_do_open() is incorrectly 
>> triggering
>> state recovery") because all slots are drained before calling
>> nfs4_do_reclaim, which prevents concurrent stateid changes along this 
>> path.
>> Also, nfs_delegation_claim_locks does not cause this concurrency either
>> since when _nfs4_proc_setlk is called with NFS_DELEGATED_STATE, no RPC is
>> sent, so nfs4_lock_done is not called. Therefore,
>> nfs4_lock_delegation_recall from nfs_delegation_claim_locks is the first
>> time the stateid is set.
>>
>> Reported-by: Li Lingfeng <lilingfeng3@huawei.com>
>> Closes: https://lore.kernel.org/all/20250419085709.1452492-1- 
>> lilingfeng3@huawei.com/
>> Closes: https://lore.kernel.org/all/20250715030559.2906634-1- 
>> lilingfeng3@huawei.com/
>> Fixes: c69899a17ca4 ("NFSv4: Update of VFS byte range lock must be 
>> atomic with the stateid update")
>> Signed-off-by: Yang Erkun <yangerkun@huawei.com>
>> ---
>>   fs/nfs/delegation.c     |  9 ++++++++-
>>   fs/nfs/nfs4proc.c       | 22 +++++++++++-----------
>>   include/linux/nfs_xdr.h |  1 -
>>   3 files changed, 19 insertions(+), 13 deletions(-)
>>
>> diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
>> index 122fb3f14ffb..9546d2195c25 100644
>> --- a/fs/nfs/delegation.c
>> +++ b/fs/nfs/delegation.c
>> @@ -173,6 +173,7 @@ int nfs4_check_delegation(struct inode *inode, 
>> fmode_t type)
>>   static int nfs_delegation_claim_locks(struct nfs4_state *state, 
>> const nfs4_stateid *stateid)
>>   {
>>       struct inode *inode = state->inode;
>> +    struct nfs_inode *nfsi = NFS_I(inode);
>>       struct file_lock *fl;
>>       struct file_lock_context *flctx = locks_inode_context(inode);
>>       struct list_head *list;
>> @@ -182,6 +183,9 @@ static int nfs_delegation_claim_locks(struct 
>> nfs4_state *state, const nfs4_state
>>           goto out;
>>       list = &flctx->flc_posix;
>> +
>> +    /* Guard against reclaim and new lock/unlock calls */
>> +    down_write(&nfsi->rwsem);
>>       spin_lock(&flctx->flc_lock);
>>   restart:
>>       for_each_file_lock(fl, list) {
>> @@ -189,8 +193,10 @@ static int nfs_delegation_claim_locks(struct 
>> nfs4_state *state, const nfs4_state
>>               continue;
>>           spin_unlock(&flctx->flc_lock);
>>           status = nfs4_lock_delegation_recall(fl, state, stateid);
>> -        if (status < 0)
>> +        if (status < 0) {
>> +            up_write(&nfsi->rwsem);
>>               goto out;
>> +        }
>>           spin_lock(&flctx->flc_lock);
>>       }
>>       if (list == &flctx->flc_posix) {
>> @@ -198,6 +204,7 @@ static int nfs_delegation_claim_locks(struct 
>> nfs4_state *state, const nfs4_state
>>           goto restart;
>>       }
>>       spin_unlock(&flctx->flc_lock);
>> +    up_write(&nfsi->rwsem);
>>   out:
>>       return status;
>>   }
>> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
>> index 91bcf67bd743..9d6fbca8798b 100644
>> --- a/fs/nfs/nfs4proc.c
>> +++ b/fs/nfs/nfs4proc.c
>> @@ -7076,7 +7076,6 @@ static void nfs4_locku_done(struct rpc_task 
>> *task, void *data)
>>       switch (task->tk_status) {
>>           case 0:
>>               renew_lease(calldata->server, calldata->timestamp);
>> -            locks_lock_inode_wait(calldata->lsp->ls_state->inode, 
>> &calldata->fl);
>>               if (nfs4_update_lock_stateid(calldata->lsp,
>>                       &calldata->res.stateid))
>>                   break;
>> @@ -7344,11 +7343,6 @@ static void nfs4_lock_done(struct rpc_task 
>> *task, void *calldata)
>>       case 0:
>>           renew_lease(NFS_SERVER(d_inode(data->ctx->dentry)),
>>                   data->timestamp);
>> -        if (data->arg.new_lock && !data->cancelled) {
>> -            data->fl.c.flc_flags &= ~(FL_SLEEP | FL_ACCESS);
>> -            if (locks_lock_inode_wait(lsp->ls_state->inode, &data- 
>> >fl) < 0)
>> -                goto out_restart;
>> -        }
>>           if (data->arg.new_lock_owner != 0) {
>>               nfs_confirm_seqid(&lsp->ls_seqid, 0);
>>               nfs4_stateid_copy(&lsp->ls_stateid, &data->res.stateid);
>> @@ -7459,11 +7453,10 @@ static int _nfs4_do_setlk(struct nfs4_state 
>> *state, int cmd, struct file_lock *f
>>       msg.rpc_argp = &data->arg;
>>       msg.rpc_resp = &data->res;
>>       task_setup_data.callback_data = data;
>> -    if (recovery_type > NFS_LOCK_NEW) {
>> -        if (recovery_type == NFS_LOCK_RECLAIM)
>> -            data->arg.reclaim = NFS_LOCK_RECLAIM;
>> -    } else
>> -        data->arg.new_lock = 1;
>> +
>> +    if (recovery_type == NFS_LOCK_RECLAIM)
>> +        data->arg.reclaim = NFS_LOCK_RECLAIM;
>> +
>>       task = rpc_run_task(&task_setup_data);
>>       if (IS_ERR(task))
>>           return PTR_ERR(task);
>> @@ -7573,6 +7566,13 @@ static int _nfs4_proc_setlk(struct nfs4_state 
>> *state, int cmd, struct file_lock
>>       up_read(&nfsi->rwsem);
>>       mutex_unlock(&sp->so_delegreturn_mutex);
>>       status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW);
>> +    if (status)
>> +        goto out;
>> +
>> +    down_read(&nfsi->rwsem);
>> +    request->c.flc_flags &= ~(FL_SLEEP | FL_ACCESS);
>> +    status = locks_lock_inode_wait(state->inode, request);
>> +    up_read(&nfsi->rwsem);
>>   out:
>>       request->c.flc_flags = flags;
>>       return status;
>> diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
>> index ff1f12aa73d2..9599ad15c3ad 100644
>> --- a/include/linux/nfs_xdr.h
>> +++ b/include/linux/nfs_xdr.h
>> @@ -580,7 +580,6 @@ struct nfs_lock_args {
>>       struct nfs_lowner    lock_owner;
>>       unsigned char        block : 1;
>>       unsigned char        reclaim : 1;
>> -    unsigned char        new_lock : 1;
>>       unsigned char        new_lock_owner : 1;
>>   };
> 
> 
>