[PATCH v2] nfs: fix the race of lock/unlock and open

Li Lingfeng posted 1 patch 6 months, 3 weeks ago
fs/nfs/delegation.c | 5 ++++-
fs/nfs/nfs4proc.c   | 8 +++++++-
2 files changed, 11 insertions(+), 2 deletions(-)
[PATCH v2] nfs: fix the race of lock/unlock and open
Posted by Li Lingfeng 6 months, 3 weeks ago
LOCK may extend an existing lock and release another one and UNLOCK may
also release an existing lock.
When opening a file, there may be access to file locks that have been
concurrently released by lock/unlock operations, potentially triggering
UAF.
While certain concurrent scenarios involving lock/unlock and open
operations have been safeguarded with locks – for example,
nfs4_proc_unlckz() acquires the so_delegreturn_mutex prior to invoking
locks_lock_inode_wait() – there remain cases where such protection is not
yet implemented.

The issue can be reproduced through the following steps:
T1: open in read-only mode with three consecutive lock operations applied
    lock1(0~100) --> add lock1 to file
    lock2(120~200) --> add lock2 to file
    lock3(50~150) --> extend lock1 to cover range 0~200 and release lock2
T2: restart nfs-server and run state manager
T3: open in write-only mode
    T1                            T2                                T3
                            start recover
lock1
lock2
                            nfs4_open_reclaim
                            clear_bit // NFS_DELEGATED_STATE
lock3
 _nfs4_proc_setlk
  lock so_delegreturn_mutex
  unlock so_delegreturn_mutex
  _nfs4_do_setlk
                            recover done
                                                lock so_delegreturn_mutex
                                                nfs_delegation_claim_locks
                                                get lock2
   rpc_run_task
   ...
   nfs4_lock_done
    locks_lock_inode_wait
    ...
     locks_dispose_list
     free lock2
                                                use lock2
                                                // UAF
                                                unlock so_delegreturn_mutex

Protect file lock by nfsi->rwsem to fix this issue.

Fixes: c69899a17ca4 ("NFSv4: Update of VFS byte range lock must be atomic with the stateid update")
Reported-by: zhangjian (CG) <zhangjian496@huawei.com>
Suggested-by: yangerkun <yangerkun@huawei.com>
Signed-off-by: Li Lingfeng <lilingfeng3@huawei.com>
---
Changes in v2:
  Use nfsi->rwsem instead of sp->so_delegreturn_mutex to prevent concurrency. 

 fs/nfs/delegation.c | 5 ++++-
 fs/nfs/nfs4proc.c   | 8 +++++++-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 10ef46e29b25..4467b4f61905 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -149,15 +149,17 @@ int nfs4_check_delegation(struct inode *inode, fmode_t type)
 static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_stateid *stateid)
 {
 	struct inode *inode = state->inode;
+	struct nfs_inode *nfsi = NFS_I(inode);
 	struct file_lock *fl;
 	struct file_lock_context *flctx = locks_inode_context(inode);
 	struct list_head *list;
 	int status = 0;
 
 	if (flctx == NULL)
-		goto out;
+		return status;
 
 	list = &flctx->flc_posix;
+	down_write(&nfsi->rwsem);
 	spin_lock(&flctx->flc_lock);
 restart:
 	for_each_file_lock(fl, list) {
@@ -175,6 +177,7 @@ static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_state
 	}
 	spin_unlock(&flctx->flc_lock);
 out:
+	up_write(&nfsi->rwsem);
 	return status;
 }
 
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 341740fa293d..06f109c7eb2e 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -7294,14 +7294,18 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
 	status = -ENOMEM;
 	if (IS_ERR(seqid))
 		goto out;
+	down_read(&nfsi->rwsem);
 	task = nfs4_do_unlck(request,
 			     nfs_file_open_context(request->c.flc_file),
 			     lsp, seqid);
 	status = PTR_ERR(task);
-	if (IS_ERR(task))
+	if (IS_ERR(task)) {
+		up_read(&nfsi->rwsem);
 		goto out;
+	}
 	status = rpc_wait_for_completion_task(task);
 	rpc_put_task(task);
+	up_read(&nfsi->rwsem);
 out:
 	request->c.flc_flags = saved_flags;
 	trace_nfs4_unlock(request, state, F_SETLK, status);
@@ -7642,7 +7646,9 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
 	}
 	up_read(&nfsi->rwsem);
 	mutex_unlock(&sp->so_delegreturn_mutex);
+	down_read(&nfsi->rwsem);
 	status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW);
+	up_read(&nfsi->rwsem);
 out:
 	request->c.flc_flags = flags;
 	return status;
-- 
2.31.1

Re: [PATCH v2] nfs: fix the race of lock/unlock and open
Posted by Li Lingfeng 5 months, 1 week ago
Friendly ping..

Thanks

在 2025/7/15 11:05, Li Lingfeng 写道:
> LOCK may extend an existing lock and release another one and UNLOCK may
> also release an existing lock.
> When opening a file, there may be access to file locks that have been
> concurrently released by lock/unlock operations, potentially triggering
> UAF.
> While certain concurrent scenarios involving lock/unlock and open
> operations have been safeguarded with locks – for example,
> nfs4_proc_unlckz() acquires the so_delegreturn_mutex prior to invoking
> locks_lock_inode_wait() – there remain cases where such protection is not
> yet implemented.
>
> The issue can be reproduced through the following steps:
> T1: open in read-only mode with three consecutive lock operations applied
>      lock1(0~100) --> add lock1 to file
>      lock2(120~200) --> add lock2 to file
>      lock3(50~150) --> extend lock1 to cover range 0~200 and release lock2
> T2: restart nfs-server and run state manager
> T3: open in write-only mode
>      T1                            T2                                T3
>                              start recover
> lock1
> lock2
>                              nfs4_open_reclaim
>                              clear_bit // NFS_DELEGATED_STATE
> lock3
>   _nfs4_proc_setlk
>    lock so_delegreturn_mutex
>    unlock so_delegreturn_mutex
>    _nfs4_do_setlk
>                              recover done
>                                                  lock so_delegreturn_mutex
>                                                  nfs_delegation_claim_locks
>                                                  get lock2
>     rpc_run_task
>     ...
>     nfs4_lock_done
>      locks_lock_inode_wait
>      ...
>       locks_dispose_list
>       free lock2
>                                                  use lock2
>                                                  // UAF
>                                                  unlock so_delegreturn_mutex
>
> Protect file lock by nfsi->rwsem to fix this issue.
>
> Fixes: c69899a17ca4 ("NFSv4: Update of VFS byte range lock must be atomic with the stateid update")
> Reported-by: zhangjian (CG) <zhangjian496@huawei.com>
> Suggested-by: yangerkun <yangerkun@huawei.com>
> Signed-off-by: Li Lingfeng <lilingfeng3@huawei.com>
> ---
> Changes in v2:
>    Use nfsi->rwsem instead of sp->so_delegreturn_mutex to prevent concurrency.
>
>   fs/nfs/delegation.c | 5 ++++-
>   fs/nfs/nfs4proc.c   | 8 +++++++-
>   2 files changed, 11 insertions(+), 2 deletions(-)
>
> diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
> index 10ef46e29b25..4467b4f61905 100644
> --- a/fs/nfs/delegation.c
> +++ b/fs/nfs/delegation.c
> @@ -149,15 +149,17 @@ int nfs4_check_delegation(struct inode *inode, fmode_t type)
>   static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_stateid *stateid)
>   {
>   	struct inode *inode = state->inode;
> +	struct nfs_inode *nfsi = NFS_I(inode);
>   	struct file_lock *fl;
>   	struct file_lock_context *flctx = locks_inode_context(inode);
>   	struct list_head *list;
>   	int status = 0;
>   
>   	if (flctx == NULL)
> -		goto out;
> +		return status;
>   
>   	list = &flctx->flc_posix;
> +	down_write(&nfsi->rwsem);
>   	spin_lock(&flctx->flc_lock);
>   restart:
>   	for_each_file_lock(fl, list) {
> @@ -175,6 +177,7 @@ static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_state
>   	}
>   	spin_unlock(&flctx->flc_lock);
>   out:
> +	up_write(&nfsi->rwsem);
>   	return status;
>   }
>   
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index 341740fa293d..06f109c7eb2e 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -7294,14 +7294,18 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
>   	status = -ENOMEM;
>   	if (IS_ERR(seqid))
>   		goto out;
> +	down_read(&nfsi->rwsem);
>   	task = nfs4_do_unlck(request,
>   			     nfs_file_open_context(request->c.flc_file),
>   			     lsp, seqid);
>   	status = PTR_ERR(task);
> -	if (IS_ERR(task))
> +	if (IS_ERR(task)) {
> +		up_read(&nfsi->rwsem);
>   		goto out;
> +	}
>   	status = rpc_wait_for_completion_task(task);
>   	rpc_put_task(task);
> +	up_read(&nfsi->rwsem);
>   out:
>   	request->c.flc_flags = saved_flags;
>   	trace_nfs4_unlock(request, state, F_SETLK, status);
> @@ -7642,7 +7646,9 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
>   	}
>   	up_read(&nfsi->rwsem);
>   	mutex_unlock(&sp->so_delegreturn_mutex);
> +	down_read(&nfsi->rwsem);
>   	status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW);
> +	up_read(&nfsi->rwsem);
>   out:
>   	request->c.flc_flags = flags;
>   	return status;
Re: [PATCH v2] nfs: fix the race of lock/unlock and open
Posted by Li Lingfeng 1 month ago
Hi,

Recently, we found that this solution can introduce a deadlock issue:
         T1
nfs_flock
  do_unlk
   nfs4_proc_lock
    nfs4_proc_unlck
     down_read // holding &nfsi->rwsem
     nfs4_do_unlck
     rpc_wait_for_completion_task // waiting for the rpc_task to complete

// .rpc_call_done
nfs4_locku_done
  nfs4_async_handle_exception
   nfs4_do_handle_exception
    exception->recovering = 1
   rpc_sleep_on // the rpc_task sleeps on &clp->cl_rpcwaitq, waiting to 
be woken up by T2

         T2
nfs4_state_manager
  nfs4_do_reclaim
   nfs4_reclaim_open_state
    __nfs4_reclaim_open_state
     nfs4_reclaim_locks
      down_write // tries to acquire &nfsi->rwsem and gets stuck

It seems that using &nfsi->rwsem to protect file locks is not a good idea.
Does anyone have a viable approach to address this UAF issue?

Thanks,
Lingfeng.

在 2025/9/1 22:25, Li Lingfeng 写道:
> Friendly ping..
>
> Thanks
>
> 在 2025/7/15 11:05, Li Lingfeng 写道:
>> LOCK may extend an existing lock and release another one and UNLOCK may
>> also release an existing lock.
>> When opening a file, there may be access to file locks that have been
>> concurrently released by lock/unlock operations, potentially triggering
>> UAF.
>> While certain concurrent scenarios involving lock/unlock and open
>> operations have been safeguarded with locks – for example,
>> nfs4_proc_unlckz() acquires the so_delegreturn_mutex prior to invoking
>> locks_lock_inode_wait() – there remain cases where such protection is 
>> not
>> yet implemented.
>>
>> The issue can be reproduced through the following steps:
>> T1: open in read-only mode with three consecutive lock operations 
>> applied
>>      lock1(0~100) --> add lock1 to file
>>      lock2(120~200) --> add lock2 to file
>>      lock3(50~150) --> extend lock1 to cover range 0~200 and release 
>> lock2
>> T2: restart nfs-server and run state manager
>> T3: open in write-only mode
>>      T1 T2                                T3
>>                              start recover
>> lock1
>> lock2
>>                              nfs4_open_reclaim
>>                              clear_bit // NFS_DELEGATED_STATE
>> lock3
>>   _nfs4_proc_setlk
>>    lock so_delegreturn_mutex
>>    unlock so_delegreturn_mutex
>>    _nfs4_do_setlk
>>                              recover done
>>                                                  lock 
>> so_delegreturn_mutex
>> nfs_delegation_claim_locks
>>                                                  get lock2
>>     rpc_run_task
>>     ...
>>     nfs4_lock_done
>>      locks_lock_inode_wait
>>      ...
>>       locks_dispose_list
>>       free lock2
>>                                                  use lock2
>>                                                  // UAF
>>                                                  unlock 
>> so_delegreturn_mutex
>>
>> Protect file lock by nfsi->rwsem to fix this issue.
>>
>> Fixes: c69899a17ca4 ("NFSv4: Update of VFS byte range lock must be 
>> atomic with the stateid update")
>> Reported-by: zhangjian (CG) <zhangjian496@huawei.com>
>> Suggested-by: yangerkun <yangerkun@huawei.com>
>> Signed-off-by: Li Lingfeng <lilingfeng3@huawei.com>
>> ---
>> Changes in v2:
>>    Use nfsi->rwsem instead of sp->so_delegreturn_mutex to prevent 
>> concurrency.
>>
>>   fs/nfs/delegation.c | 5 ++++-
>>   fs/nfs/nfs4proc.c   | 8 +++++++-
>>   2 files changed, 11 insertions(+), 2 deletions(-)
>>
>> diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
>> index 10ef46e29b25..4467b4f61905 100644
>> --- a/fs/nfs/delegation.c
>> +++ b/fs/nfs/delegation.c
>> @@ -149,15 +149,17 @@ int nfs4_check_delegation(struct inode *inode, 
>> fmode_t type)
>>   static int nfs_delegation_claim_locks(struct nfs4_state *state, 
>> const nfs4_stateid *stateid)
>>   {
>>       struct inode *inode = state->inode;
>> +    struct nfs_inode *nfsi = NFS_I(inode);
>>       struct file_lock *fl;
>>       struct file_lock_context *flctx = locks_inode_context(inode);
>>       struct list_head *list;
>>       int status = 0;
>>         if (flctx == NULL)
>> -        goto out;
>> +        return status;
>>         list = &flctx->flc_posix;
>> +    down_write(&nfsi->rwsem);
>>       spin_lock(&flctx->flc_lock);
>>   restart:
>>       for_each_file_lock(fl, list) {
>> @@ -175,6 +177,7 @@ static int nfs_delegation_claim_locks(struct 
>> nfs4_state *state, const nfs4_state
>>       }
>>       spin_unlock(&flctx->flc_lock);
>>   out:
>> +    up_write(&nfsi->rwsem);
>>       return status;
>>   }
>>   diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
>> index 341740fa293d..06f109c7eb2e 100644
>> --- a/fs/nfs/nfs4proc.c
>> +++ b/fs/nfs/nfs4proc.c
>> @@ -7294,14 +7294,18 @@ static int nfs4_proc_unlck(struct nfs4_state 
>> *state, int cmd, struct file_lock *
>>       status = -ENOMEM;
>>       if (IS_ERR(seqid))
>>           goto out;
>> +    down_read(&nfsi->rwsem);
>>       task = nfs4_do_unlck(request,
>> nfs_file_open_context(request->c.flc_file),
>>                    lsp, seqid);
>>       status = PTR_ERR(task);
>> -    if (IS_ERR(task))
>> +    if (IS_ERR(task)) {
>> +        up_read(&nfsi->rwsem);
>>           goto out;
>> +    }
>>       status = rpc_wait_for_completion_task(task);
>>       rpc_put_task(task);
>> +    up_read(&nfsi->rwsem);
>>   out:
>>       request->c.flc_flags = saved_flags;
>>       trace_nfs4_unlock(request, state, F_SETLK, status);
>> @@ -7642,7 +7646,9 @@ static int _nfs4_proc_setlk(struct nfs4_state 
>> *state, int cmd, struct file_lock
>>       }
>>       up_read(&nfsi->rwsem);
>>       mutex_unlock(&sp->so_delegreturn_mutex);
>> +    down_read(&nfsi->rwsem);
>>       status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW);
>> +    up_read(&nfsi->rwsem);
>>   out:
>>       request->c.flc_flags = flags;
>>       return status;
Re: [PATCH v2] nfs: fix the race of lock/unlock and open
Posted by Li Lingfeng 5 days, 5 hours ago
Hi,

I tried separating the local VFS lock update from the RPC completion path
(performing an unlock if the VFS lock update fails), and then using
nfsi->rwsem to protect the file lock to prevent UAF.

Split local VFS lock update from RPC completion path:
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 491fbe65e644..41d66e34851b 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -6887,7 +6887,7 @@ static struct rpc_task *nfs4_do_unlck(struct 
file_lock *fl,
         return rpc_run_task(&task_setup_data);
  }

-static int nfs4_proc_unlck(struct nfs4_state *state, struct file_lock 
*request)
+static int nfs4_proc_unlck(struct nfs4_state *state, struct file_lock 
*request, int remote_only)
  {
         struct inode *inode = state->inode;
         struct nfs4_state_owner *sp = state->owner;
@@ -6906,7 +6906,7 @@ static int nfs4_proc_unlck(struct nfs4_state 
*state, struct file_lock *request)
         mutex_lock(&sp->so_delegreturn_mutex);
         /* Exclude nfs4_reclaim_open_stateid() - note nesting! */
         down_read(&nfsi->rwsem);
-       if (locks_lock_inode_wait(inode, request) == -ENOENT) {
+       if (!remote_only && locks_lock_inode_wait(inode, request) == 
-ENOENT) {
                 up_read(&nfsi->rwsem);
                 mutex_unlock(&sp->so_delegreturn_mutex);
                 goto out;
@@ -7044,11 +7044,6 @@ static void nfs4_lock_done(struct rpc_task *task, 
void *calldata)
         case 0:
renew_lease(NFS_SERVER(d_inode(data->ctx->dentry)),
                                 data->timestamp);
-               if (data->arg.new_lock && !data->cancelled) {
-                       data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS);
-                       if (locks_lock_inode_wait(lsp->ls_state->inode, 
&data->fl) < 0)
-                               goto out_restart;
-               }
                 if (data->arg.new_lock_owner != 0) {
                         nfs_confirm_seqid(&lsp->ls_seqid, 0);
                         nfs4_stateid_copy(&lsp->ls_stateid, 
&data->res.stateid);
@@ -7254,7 +7249,7 @@ static int _nfs4_proc_setlk(struct nfs4_state 
*state, int cmd, struct file_lock
         struct nfs_inode *nfsi = NFS_I(state->inode);
         struct nfs4_state_owner *sp = state->owner;
         unsigned char fl_flags = request->fl_flags;
-       int status;
+       int status, ret;

         request->fl_flags |= FL_ACCESS;
         status = locks_lock_inode_wait(state->inode, request);
@@ -7274,6 +7269,16 @@ static int _nfs4_proc_setlk(struct nfs4_state 
*state, int cmd, struct file_lock
         up_read(&nfsi->rwsem);
         mutex_unlock(&sp->so_delegreturn_mutex);
         status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW);
+       if (status != 0)
+               goto out;
+
+       request->fl_flags = fl_flags & ~(FL_SLEEP | FL_ACCESS);
+       status = locks_lock_inode_wait(state->inode, request);
+       if (status) {
+               ret = nfs4_proc_unlck(state, request, 1);
+               status = ret ? ret : status;
+               dprintk("%s: cancelling lock!\n", __func__);
+       }
  out:
         request->fl_flags = fl_flags;
         return status;
@@ -7428,7 +7433,7 @@ nfs4_proc_lock(struct file *filp, int cmd, struct 
file_lock *request)

         if (request->fl_type == F_UNLCK) {
                 if (state != NULL)
-                       return nfs4_proc_unlck(state, request);
+                       return nfs4_proc_unlck(state, request, 0);
                 return 0;
         }

Protect file lock by nfsi->rwsem:
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index c2eb01e5eeab..251a0b196d05 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -145,15 +145,17 @@ int nfs4_check_delegation(struct inode *inode, 
fmode_t flags)
  static int nfs_delegation_claim_locks(struct nfs4_state *state, const 
nfs4_stateid *stateid)
  {
         struct inode *inode = state->inode;
+       struct nfs_inode *nfsi = NFS_I(inode);
         struct file_lock *fl;
         struct file_lock_context *flctx = inode->i_flctx;
         struct list_head *list;
         int status = 0;

         if (flctx == NULL)
-               goto out;
+               return status;

         list = &flctx->flc_posix;
+       down_write(&nfsi->rwsem);
         spin_lock(&flctx->flc_lock);
  restart:
         list_for_each_entry(fl, list, fl_list) {
@@ -171,6 +173,7 @@ static int nfs_delegation_claim_locks(struct 
nfs4_state *state, const nfs4_state
         }
         spin_unlock(&flctx->flc_lock);
  out:
+       up_write(&nfsi->rwsem);
         return status;
  }

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 41d66e34851b..e763423c81ec 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -6770,18 +6770,23 @@ static void nfs4_locku_done(struct rpc_task 
*task, void *data)
                 .stateid = &calldata->arg.stateid,
         };
         int status;
+       struct nfs_inode *nfsi;

         if (!nfs4_sequence_done(task, &calldata->res.seq_res))
                 return;
         switch (task->tk_status) {
                 case 0:
+                       nfsi = NFS_I(calldata->lsp->ls_state->inode);
                         renew_lease(calldata->server, calldata->timestamp);
+                       down_read(&nfsi->rwsem);
                         status = 
locks_lock_inode_wait(calldata->lsp->ls_state->inode,
  &calldata->fl);
                         if (status && (status != -ENOENT)) {
+                               up_read(&nfsi->rwsem);
                                 rpc_restart_call_prepare(task);
                                 break;
                         }
+                       up_read(&nfsi->rwsem);
                         if (nfs4_update_lock_stateid(calldata->lsp,
&calldata->res.stateid))
                                 break;
@@ -7273,7 +7278,9 @@ static int _nfs4_proc_setlk(struct nfs4_state 
*state, int cmd, struct file_lock
                 goto out;

         request->fl_flags = fl_flags & ~(FL_SLEEP | FL_ACCESS);
+       down_read(&nfsi->rwsem);
         status = locks_lock_inode_wait(state->inode, request);
+       up_read(&nfsi->rwsem);
         if (status) {
                 ret = nfs4_proc_unlck(state, request, 1);
                 status = ret ? ret : status;

However, this partially reverts the logic from commit c69899a17ca4
('NFSv4: Update of VFS byte range lock must be atomic with the stateid
update'), making the VFS lock update non-atomic with the stateid update.
I'm not sure if this might cause any issues.

Does anyone have any thoughts on this solution, or perhaps alternative
approaches?

Thanks,
Lingfeng.

在 2026/1/7 19:07, Li Lingfeng 写道:
> Hi,
>
> Recently, we found that this solution can introduce a deadlock issue:
>         T1
> nfs_flock
>  do_unlk
>   nfs4_proc_lock
>    nfs4_proc_unlck
>     down_read // holding &nfsi->rwsem
>     nfs4_do_unlck
>     rpc_wait_for_completion_task // waiting for the rpc_task to complete
>
> // .rpc_call_done
> nfs4_locku_done
>  nfs4_async_handle_exception
>   nfs4_do_handle_exception
>    exception->recovering = 1
>   rpc_sleep_on // the rpc_task sleeps on &clp->cl_rpcwaitq, waiting to 
> be woken up by T2
>
>         T2
> nfs4_state_manager
>  nfs4_do_reclaim
>   nfs4_reclaim_open_state
>    __nfs4_reclaim_open_state
>     nfs4_reclaim_locks
>      down_write // tries to acquire &nfsi->rwsem and gets stuck
>
> It seems that using &nfsi->rwsem to protect file locks is not a good 
> idea.
> Does anyone have a viable approach to address this UAF issue?
>
> Thanks,
> Lingfeng.
>
> 在 2025/9/1 22:25, Li Lingfeng 写道:
>> Friendly ping..
>>
>> Thanks
>>
>> 在 2025/7/15 11:05, Li Lingfeng 写道:
>>> LOCK may extend an existing lock and release another one and UNLOCK may
>>> also release an existing lock.
>>> When opening a file, there may be access to file locks that have been
>>> concurrently released by lock/unlock operations, potentially triggering
>>> UAF.
>>> While certain concurrent scenarios involving lock/unlock and open
>>> operations have been safeguarded with locks – for example,
>>> nfs4_proc_unlckz() acquires the so_delegreturn_mutex prior to invoking
>>> locks_lock_inode_wait() – there remain cases where such protection 
>>> is not
>>> yet implemented.
>>>
>>> The issue can be reproduced through the following steps:
>>> T1: open in read-only mode with three consecutive lock operations 
>>> applied
>>>      lock1(0~100) --> add lock1 to file
>>>      lock2(120~200) --> add lock2 to file
>>>      lock3(50~150) --> extend lock1 to cover range 0~200 and release 
>>> lock2
>>> T2: restart nfs-server and run state manager
>>> T3: open in write-only mode
>>>      T1 T2                                T3
>>>                              start recover
>>> lock1
>>> lock2
>>>                              nfs4_open_reclaim
>>>                              clear_bit // NFS_DELEGATED_STATE
>>> lock3
>>>   _nfs4_proc_setlk
>>>    lock so_delegreturn_mutex
>>>    unlock so_delegreturn_mutex
>>>    _nfs4_do_setlk
>>>                              recover done
>>>                                                  lock 
>>> so_delegreturn_mutex
>>> nfs_delegation_claim_locks
>>>                                                  get lock2
>>>     rpc_run_task
>>>     ...
>>>     nfs4_lock_done
>>>      locks_lock_inode_wait
>>>      ...
>>>       locks_dispose_list
>>>       free lock2
>>>                                                  use lock2
>>>                                                  // UAF
>>>                                                  unlock 
>>> so_delegreturn_mutex
>>>
>>> Protect file lock by nfsi->rwsem to fix this issue.
>>>
>>> Fixes: c69899a17ca4 ("NFSv4: Update of VFS byte range lock must be 
>>> atomic with the stateid update")
>>> Reported-by: zhangjian (CG) <zhangjian496@huawei.com>
>>> Suggested-by: yangerkun <yangerkun@huawei.com>
>>> Signed-off-by: Li Lingfeng <lilingfeng3@huawei.com>
>>> ---
>>> Changes in v2:
>>>    Use nfsi->rwsem instead of sp->so_delegreturn_mutex to prevent 
>>> concurrency.
>>>
>>>   fs/nfs/delegation.c | 5 ++++-
>>>   fs/nfs/nfs4proc.c   | 8 +++++++-
>>>   2 files changed, 11 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
>>> index 10ef46e29b25..4467b4f61905 100644
>>> --- a/fs/nfs/delegation.c
>>> +++ b/fs/nfs/delegation.c
>>> @@ -149,15 +149,17 @@ int nfs4_check_delegation(struct inode *inode, 
>>> fmode_t type)
>>>   static int nfs_delegation_claim_locks(struct nfs4_state *state, 
>>> const nfs4_stateid *stateid)
>>>   {
>>>       struct inode *inode = state->inode;
>>> +    struct nfs_inode *nfsi = NFS_I(inode);
>>>       struct file_lock *fl;
>>>       struct file_lock_context *flctx = locks_inode_context(inode);
>>>       struct list_head *list;
>>>       int status = 0;
>>>         if (flctx == NULL)
>>> -        goto out;
>>> +        return status;
>>>         list = &flctx->flc_posix;
>>> +    down_write(&nfsi->rwsem);
>>>       spin_lock(&flctx->flc_lock);
>>>   restart:
>>>       for_each_file_lock(fl, list) {
>>> @@ -175,6 +177,7 @@ static int nfs_delegation_claim_locks(struct 
>>> nfs4_state *state, const nfs4_state
>>>       }
>>>       spin_unlock(&flctx->flc_lock);
>>>   out:
>>> +    up_write(&nfsi->rwsem);
>>>       return status;
>>>   }
>>>   diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
>>> index 341740fa293d..06f109c7eb2e 100644
>>> --- a/fs/nfs/nfs4proc.c
>>> +++ b/fs/nfs/nfs4proc.c
>>> @@ -7294,14 +7294,18 @@ static int nfs4_proc_unlck(struct nfs4_state 
>>> *state, int cmd, struct file_lock *
>>>       status = -ENOMEM;
>>>       if (IS_ERR(seqid))
>>>           goto out;
>>> +    down_read(&nfsi->rwsem);
>>>       task = nfs4_do_unlck(request,
>>> nfs_file_open_context(request->c.flc_file),
>>>                    lsp, seqid);
>>>       status = PTR_ERR(task);
>>> -    if (IS_ERR(task))
>>> +    if (IS_ERR(task)) {
>>> +        up_read(&nfsi->rwsem);
>>>           goto out;
>>> +    }
>>>       status = rpc_wait_for_completion_task(task);
>>>       rpc_put_task(task);
>>> +    up_read(&nfsi->rwsem);
>>>   out:
>>>       request->c.flc_flags = saved_flags;
>>>       trace_nfs4_unlock(request, state, F_SETLK, status);
>>> @@ -7642,7 +7646,9 @@ static int _nfs4_proc_setlk(struct nfs4_state 
>>> *state, int cmd, struct file_lock
>>>       }
>>>       up_read(&nfsi->rwsem);
>>>       mutex_unlock(&sp->so_delegreturn_mutex);
>>> +    down_read(&nfsi->rwsem);
>>>       status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW);
>>> +    up_read(&nfsi->rwsem);
>>>   out:
>>>       request->c.flc_flags = flags;
>>>       return status;